├── .circleci └── config.yml ├── .clang-format ├── .github └── FUNDING.yml ├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── analyzer.c ├── ast.c ├── compilium.c ├── compilium.h ├── examples ├── .gitignore ├── Makefile ├── README.md ├── calc.c ├── collatz.c ├── constsum.c ├── ctests.c ├── fib.c ├── gameoflife.c ├── gen_constsum.js ├── hello.c ├── jsondump.c ├── jsondump_test.sh ├── measure_collatz.sh ├── measure_constsum.sh ├── measure_fib.sh ├── measure_pi.sh └── pi.c ├── generator.c ├── include ├── stdarg.h ├── stdbool.h ├── stdio.h ├── stdlib.h └── string.h ├── linkage_test ├── Makefile ├── external.c ├── linkage_test.bin ├── linkage_test.c └── linkage_test.host.bin ├── parser.c ├── preprocessor.c ├── struct.c ├── symbol.c ├── test.sh ├── test_preprocess.sh ├── token.c ├── tokenizer.c └── type.c /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | jobs: 3 | build: 4 | docker: 5 | - image: hikalium/ubuntu-for-compilium:latest 6 | steps: 7 | - checkout 8 | - run: apt install time 9 | - run: 10 | name: Run test 11 | command: make testall CC=clang 12 | -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | BasedOnStyle: Google 2 | AlignEscapedNewlines: DontAlign 3 | AlignConsecutiveDeclarations: false 4 | AlignTrailingComments: false 5 | PointerAlignment: Right 6 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: [hikalium] # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 13 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | 4 | # Object files 5 | *.o 6 | *.ko 7 | *.obj 8 | *.elf 9 | 10 | # Linker output 11 | *.ilk 12 | *.map 13 | *.exp 14 | 15 | # Precompiled Headers 16 | *.gch 17 | *.pch 18 | 19 | # Libraries 20 | *.lib 21 | *.a 22 | *.la 23 | *.lo 24 | 25 | # Shared objects (inc. Windows DLLs) 26 | *.dll 27 | *.so 28 | *.so.* 29 | *.dylib 30 | 31 | # Executables 32 | *.exe 33 | *.out 34 | *.app 35 | *.i*86 36 | *.x86_64 37 | *.hex 38 | 39 | # Debug files 40 | *.dSYM/ 41 | *.su 42 | *.idb 43 | *.pdb 44 | 45 | # Kernel Module Compile Results 46 | *.mod* 47 | *.cmd 48 | .tmp_versions/ 49 | modules.order 50 | Module.symvers 51 | Mkfile.old 52 | dkms.conf 53 | 54 | *.swp 55 | compilium 56 | compilium_self 57 | compilium_dbg 58 | compilium_unittest 59 | failcase.c 60 | *.txt 61 | *.unittest 62 | .DS_Store 63 | *.self.c 64 | *.S 65 | *.log 66 | *.preprocess.c 67 | a.out* 68 | expected.* 69 | out.* 70 | testinput.c 71 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 hikalium 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CFLAGS=-Wall -Wpedantic -Wextra -Werror -Wconditional-uninitialized -std=c11 2 | SRCS=analyzer.c ast.c compilium.c generator.c \ 3 | parser.c preprocessor.c struct.c symbol.c token.c tokenizer.c type.c 4 | HEADERS=compilium.h 5 | CC=clang 6 | FAILCASE_FILE:=failcase.c 7 | LLDB_ARGS = -o 'settings set interpreter.prompt-on-quit false' \ 8 | -o 'b __assert' \ 9 | -o 'process launch' 10 | 11 | .FORCE : 12 | 13 | %.compilium.S : compilium %.c .FORCE 14 | cat $*.c | \ 15 | ./compilium -I include/ --target-os `uname` > $*.compilium.S 16 | 17 | compilium : $(SRCS) $(HEADERS) Makefile 18 | $(CC) $(CFLAGS) -o $@ $(SRCS) 19 | 20 | compilium_dbg : $(SRCS) $(HEADERS) Makefile 21 | $(CC) $(CFLAGS) -g -o $@ $(SRCS) 22 | 23 | debug : compilium_dbg failcase.c 24 | lldb \ 25 | -o 'settings set target.input-path ${FAILCASE_FILE}' $(LLDB_ARGS) \ 26 | -- ./compilium_dbg -I include/ --target-os `uname` 27 | 28 | testall : unittest ctest test linkage_test 29 | make -C examples 30 | 31 | test_preprocess : compilium 32 | ./test_preprocess.sh 33 | 34 | test : compilium 35 | time ./test.sh 36 | 37 | ctest : compilium 38 | make -C examples run_ctests 39 | 40 | linkage_test : compilium 41 | make -C linkage_test test 42 | 43 | unittest : run_unittest_List run_unittest_Type 44 | 45 | run_unittest_% : compilium 46 | @ ./compilium --run-unittest=$* || { echo "FAIL unittest.$*: Run 'make dbg_unittest_$*' to rerun this testcase with debugger"; exit 1; } 47 | 48 | dbg_unittest_% : compilium_dbg 49 | lldb $(LLDB_ARGS)\ 50 | -- ./compilium_dbg --run-unittest=$* 51 | 52 | format: 53 | clang-format -i $(SRCS) $(HEADERS) 54 | make -C examples format 55 | 56 | commit: 57 | make format 58 | git add . 59 | git diff HEAD --color=always | less -R 60 | make testall 61 | git commit 62 | 63 | clean: 64 | -rm -r compilium compilium_dbg 65 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # compilium v2 2 | [![CircleCI](https://circleci.com/gh/hikalium/compilium/tree/v2.svg?style=svg)](https://circleci.com/gh/hikalium/compilium/tree/v2) 3 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) 4 | 5 | [WIP] C compiler + hikalium 6 | 7 | ## Build 8 | ``` 9 | make 10 | ``` 11 | 12 | ## Usage 13 | ``` 14 | ./compilium [--os_type=Linux|Darwin] 15 | ``` 16 | 17 | compilium takes stdin as an input, so you can compile your code like this (in bash): 18 | ``` 19 | ./compilium <<< "int main(){ return 0; }" 20 | ``` 21 | 22 | ## Test 23 | ``` 24 | make testall 25 | ``` 26 | 27 | ## Local CI 28 | ``` 29 | circleci config validate 30 | circleci local execute 31 | ``` 32 | 33 | ## Debug Tips 34 | ``` 35 | make debug FAILCASE_FILE=examples/calc.c 36 | 37 | ``` 38 | 39 | ## License 40 | MIT 41 | 42 | ## Author 43 | [hikalium](https://github.com/hikalium) 44 | -------------------------------------------------------------------------------- /analyzer.c: -------------------------------------------------------------------------------- 1 | #include "compilium.h" 2 | 3 | static struct Node *in_function; // ASTFuncDef 4 | static int reg_used_table[NUM_OF_SCRATCH_REGS]; 5 | static struct Node *reg_node_table[NUM_OF_SCRATCH_REGS]; 6 | 7 | static void AllocReg(struct Node *n) { 8 | assert(n); 9 | for (int i = 1; i <= NUM_OF_SCRATCH_REGS; i++) { 10 | if (!reg_used_table[i]) { 11 | reg_used_table[i] = 1; 12 | reg_node_table[i] = n; 13 | n->reg = i; 14 | return; 15 | } 16 | } 17 | fprintf(stderr, "\n**** Allocated regs ****\n"); 18 | for (int i = 1; i <= NUM_OF_SCRATCH_REGS; i++) { 19 | fprintf(stderr, "reg[%d]:\n", i); 20 | if (reg_node_table[i]->op) { 21 | PrintTokenLine(reg_node_table[i]->op); 22 | } else { 23 | fprintf(stderr, "Op info not found\n"); 24 | } 25 | } 26 | fprintf(stderr, "\n**** Tried to allocate reg for ****\n"); 27 | PrintASTNode(n); 28 | ErrorWithToken(n->op, "No free registers found"); 29 | } 30 | 31 | static void FreeReg(int reg) { 32 | assert(1 <= reg && reg <= NUM_OF_SCRATCH_REGS); 33 | reg_used_table[reg] = 0; 34 | reg_node_table[reg] = NULL; 35 | } 36 | 37 | static void AnalyzeNode(struct Node *node, struct SymbolEntry **ctx) { 38 | assert(node); 39 | if (node->type == kASTList && !node->op) { 40 | for (int i = 0; i < GetSizeOfList(node); i++) { 41 | AnalyzeNode(GetNodeAt(node, i), ctx); 42 | } 43 | return; 44 | } 45 | if (node->type == kASTExprFuncCall) { 46 | node->stack_size_needed = (GetLastLocalVarOffset(*ctx) + 0xF) & ~0xF; 47 | AllocReg(node); 48 | AnalyzeNode(node->func_expr, ctx); 49 | FreeReg(node->func_expr->reg); 50 | node->expr_type = 51 | GetReturnTypeOfFunction(GetTypeWithoutAttr(node->func_expr->expr_type)); 52 | for (int i = 0; i < GetSizeOfList(node->arg_expr_list); i++) { 53 | struct Node *n = GetNodeAt(node->arg_expr_list, i); 54 | AnalyzeNode(n, ctx); 55 | FreeReg(n->reg); 56 | } 57 | return; 58 | } else if (node->type == kASTFuncDef) { 59 | AddFuncDef(ctx, CreateTokenStr(node->func_name_token), node); 60 | struct SymbolEntry *saved_ctx = *ctx; 61 | struct Node *arg_type_list = GetArgTypeList(node->func_type); 62 | assert(arg_type_list); 63 | node->arg_var_list = AllocList(); 64 | for (int i = 0; i < GetSizeOfList(arg_type_list); i++) { 65 | struct Node *arg_type_with_attr = GetNodeAt(arg_type_list, i); 66 | struct Node *arg_ident_token = 67 | GetIdentifierTokenFromTypeAttr(arg_type_with_attr); 68 | if (!arg_ident_token) { 69 | PushToList(node->arg_var_list, NULL); 70 | continue; 71 | } 72 | struct Node *arg_type = GetTypeWithoutAttr(arg_type_with_attr); 73 | assert(arg_type); 74 | struct Node *local_var = 75 | AddLocalVar(ctx, CreateTokenStr(arg_ident_token), arg_type); 76 | PushToList(node->arg_var_list, local_var); 77 | } 78 | assert(!in_function); 79 | in_function = node; 80 | AnalyzeNode(node->func_body, ctx); 81 | in_function = NULL; 82 | *ctx = saved_ctx; 83 | return; 84 | } 85 | assert(node->op); 86 | if (node->type == kASTExpr) { 87 | if (IsTokenWithType(node->op, kTokenIntegerConstant) || 88 | IsTokenWithType(node->op, kTokenCharLiteral)) { 89 | AllocReg(node); 90 | node->expr_type = CreateTypeBase(CreateToken("int")); 91 | return; 92 | } else if (IsTokenWithType(node->op, kTokenStringLiteral)) { 93 | AllocReg(node); 94 | node->expr_type = CreateTypePointer(CreateTypeBase(CreateToken("char"))); 95 | return; 96 | } else if (IsEqualTokenWithCStr(node->op, "(")) { 97 | AnalyzeNode(node->right, ctx); 98 | node->reg = node->right->reg; 99 | node->expr_type = node->right->expr_type; 100 | return; 101 | } else if (IsEqualTokenWithCStr(node->op, "[")) { 102 | AnalyzeNode(node->left, ctx); 103 | AnalyzeNode(node->right, ctx); 104 | node->reg = node->left->reg; 105 | FreeReg(node->right->reg); 106 | assert(node->left->expr_type); 107 | struct Node *left_type = GetTypeWithoutAttr(node->left->expr_type); 108 | if (left_type->type == kTypeArray) { 109 | node->expr_type = CreateTypeLValue(left_type->type_array_type_of); 110 | } else if (left_type->type == kTypePointer) { 111 | node->expr_type = CreateTypeLValue(left_type->right); 112 | } else { 113 | assert(false); 114 | } 115 | return; 116 | } else if (IsEqualTokenWithCStr(node->op, ".") || 117 | IsEqualTokenWithCStr(node->op, "->")) { 118 | AnalyzeNode(node->left, ctx); 119 | node->reg = node->left->reg; 120 | PrintASTNode(node->left->expr_type); 121 | assert(node->right && node->right->type == kNodeToken); 122 | struct Node *struct_type = NULL; 123 | if (IsEqualTokenWithCStr(node->op, ".")) { 124 | if (GetTypeWithoutAttr(node->left->expr_type)->type != kTypeStruct) { 125 | ErrorWithToken(node->op, "left operand is not a struct"); 126 | } 127 | struct_type = node->left->expr_type; 128 | } 129 | if (IsEqualTokenWithCStr(node->op, "->")) { 130 | struct Node *left_type = GetTypeWithoutAttr(node->left->expr_type); 131 | PrintASTNode(left_type); 132 | assert(left_type->type == kTypePointer); 133 | struct Node *left_deref_type = left_type->right; 134 | assert(left_deref_type->type == kTypeStruct); 135 | struct_type = left_deref_type; 136 | } 137 | assert(struct_type); 138 | struct Node *member = FindStructMember(struct_type, node->right); 139 | if (!member) { 140 | ErrorWithToken(node->right, "Member name not found in struct"); 141 | } 142 | PrintASTNode(member); 143 | node->byte_offset = member->struct_member_ent_ofs; 144 | node->expr_type = 145 | CreateTypeLValue(GetTypeWithoutAttr(member->struct_member_ent_type)); 146 | return; 147 | } else if (IsTokenWithType(node->op, kTokenIdent)) { 148 | struct Node *ident_info = FindLocalVar(*ctx, node->op); 149 | if (ident_info) { 150 | node->byte_offset = ident_info->byte_offset; 151 | AllocReg(node); 152 | enum NodeType expr_type = 153 | GetTypeWithoutAttr(ident_info->expr_type)->type; 154 | if (expr_type == kTypeStruct || expr_type == kTypeArray) { 155 | node->expr_type = ident_info->expr_type; 156 | return; 157 | } 158 | node->expr_type = CreateTypeLValue(ident_info->expr_type); 159 | return; 160 | } 161 | struct Node *global_var_type = FindGlobalVar(*ctx, node->op); 162 | if (global_var_type) { 163 | AllocReg(node); 164 | node->expr_type = CreateTypeLValue(global_var_type); 165 | return; 166 | } 167 | struct Node *external_var_type = FindExternVar(*ctx, node->op); 168 | if (external_var_type) { 169 | AllocReg(node); 170 | node->expr_type = CreateTypeLValue(external_var_type); 171 | return; 172 | } 173 | struct Node *func_def = FindFuncDef(*ctx, node->op); 174 | if (func_def) { 175 | AllocReg(node); 176 | node->expr_type = func_def->func_type; 177 | return; 178 | } 179 | struct Node *func_decl_type = FindFuncDeclType(*ctx, node->op); 180 | if (func_decl_type) { 181 | AllocReg(node); 182 | node->expr_type = GetTypeWithoutAttr(func_decl_type); 183 | return; 184 | } 185 | ErrorWithToken(node->op, "Unknown identifier"); 186 | } else if (node->cond) { 187 | AnalyzeNode(node->cond, ctx); 188 | AnalyzeNode(node->left, ctx); 189 | AnalyzeNode(node->right, ctx); 190 | FreeReg(node->left->reg); 191 | FreeReg(node->right->reg); 192 | assert( 193 | IsSameTypeExceptAttr(node->left->expr_type, node->right->expr_type)); 194 | node->reg = node->cond->reg; 195 | node->expr_type = GetRValueType(node->right->expr_type); 196 | return; 197 | } else if (!node->left && node->right) { 198 | AnalyzeNode(node->right, ctx); 199 | if (IsEqualTokenWithCStr(node->op, "--") || 200 | IsEqualTokenWithCStr(node->op, "++")) { 201 | assert(IsLValueType(node->right->expr_type)); 202 | node->reg = node->right->reg; 203 | node->expr_type = GetRValueType(node->right->expr_type); 204 | return; 205 | } 206 | if (IsTokenWithType(node->op, kTokenKwSizeof)) { 207 | FreeReg(node->right->reg); 208 | AllocReg(node); 209 | node->expr_type = CreateTypeBase(CreateToken("int")); 210 | return; 211 | } 212 | node->reg = node->right->reg; 213 | if (IsEqualTokenWithCStr(node->op, "&")) { 214 | node->expr_type = 215 | CreateTypePointer(GetRValueType(node->right->expr_type)); 216 | return; 217 | } 218 | if (IsEqualTokenWithCStr(node->op, "*")) { 219 | struct Node *rtype = GetRValueType(node->right->expr_type); 220 | assert(rtype && rtype->type == kTypePointer); 221 | node->expr_type = CreateTypeLValue(rtype->right); 222 | return; 223 | } 224 | node->expr_type = GetRValueType(node->right->expr_type); 225 | return; 226 | } else if (node->left && !node->right) { 227 | // Postfix op 228 | if (IsEqualTokenWithCStr(node->op, "++") || 229 | IsEqualTokenWithCStr(node->op, "--")) { 230 | AnalyzeNode(node->left, ctx); 231 | assert(IsLValueType(node->left->expr_type)); 232 | node->reg = node->left->reg; 233 | node->expr_type = GetRValueType(node->left->expr_type); 234 | return; 235 | } 236 | } else if (node->left && node->right) { 237 | AnalyzeNode(node->left, ctx); 238 | AnalyzeNode(node->right, ctx); 239 | if (IsEqualTokenWithCStr(node->op, "=") || 240 | IsEqualTokenWithCStr(node->op, ",")) { 241 | FreeReg(node->left->reg); 242 | node->reg = node->right->reg; 243 | node->expr_type = GetRValueType(node->right->expr_type); 244 | return; 245 | } 246 | FreeReg(node->right->reg); 247 | node->reg = node->left->reg; 248 | node->expr_type = GetRValueType(node->left->expr_type); 249 | return; 250 | } 251 | assert(false); 252 | } 253 | if (node->type == kASTExprStmt) { 254 | if (!node->left) return; 255 | AnalyzeNode(node->left, ctx); 256 | if (node->left->reg) FreeReg(node->left->reg); 257 | return; 258 | } else if (node->type == kASTList) { 259 | struct SymbolEntry *saved_ctx = *ctx; 260 | for (int i = 0; i < GetSizeOfList(node); i++) { 261 | AnalyzeNode(GetNodeAt(node, i), ctx); 262 | } 263 | *ctx = saved_ctx; 264 | return; 265 | } else if (node->type == kASTDecl) { 266 | struct Node *raw_type = CreateTypeInContext(*ctx, node->op, node->right); 267 | PrintASTNode(raw_type); 268 | assert(raw_type); 269 | struct Node *type_ident = NULL; 270 | if (raw_type && raw_type->type == kTypeAttrIdent) { 271 | type_ident = raw_type->left; 272 | } 273 | struct Node *type = GetTypeWithoutAttr(raw_type); 274 | assert(type); 275 | 276 | if (!in_function) { 277 | // Top-level definitions 278 | if (IsASTDeclOfTypedef(node)) { 279 | return; 280 | } 281 | if (type_ident && type->type == kTypeFunction) { 282 | AddFuncDeclType(ctx, CreateTokenStr(type_ident), raw_type); 283 | return; 284 | } 285 | if (!type_ident && type->type == kTypeStruct) { 286 | struct Node *spec = type->type_struct_spec; 287 | ResolveTypesOfMembersOfStruct(*ctx, spec); 288 | assert(type->tag); 289 | AddStructType(ctx, CreateTokenStr(type->tag), type); 290 | return; 291 | } 292 | assert(type_ident); 293 | if (IsASTDeclOfExtern(node)) { 294 | AddExternVar(ctx, CreateTokenStr(type_ident), type); 295 | } else { 296 | AddGlobalVar(ctx, CreateTokenStr(type_ident), type); 297 | } 298 | assert(node->right->type == kASTDecltor); 299 | if (node->right->decltor_init_expr) { 300 | assert(false); 301 | } 302 | return; 303 | } 304 | // Local definitions 305 | assert(type_ident); 306 | AddLocalVar(ctx, CreateTokenStr(type_ident), type); 307 | assert(node->right->type == kASTDecltor); 308 | if (node->right->decltor_init_expr) { 309 | struct Node *left_expr = AllocNode(kASTExpr); 310 | left_expr->op = type_ident; 311 | node->right->decltor_init_expr->left = left_expr; 312 | AnalyzeNode(node->right->decltor_init_expr, ctx); 313 | FreeReg(node->right->decltor_init_expr->reg); 314 | } 315 | return; 316 | } else if (node->type == kASTJumpStmt) { 317 | if (IsTokenWithType(node->op, kTokenKwBreak) || 318 | IsTokenWithType(node->op, kTokenKwContinue)) { 319 | return; 320 | } 321 | if (IsTokenWithType(node->op, kTokenKwReturn)) { 322 | if (!node->right) return; 323 | AnalyzeNode(node->right, ctx); 324 | FreeReg(node->right->reg); 325 | return; 326 | } 327 | } else if (node->type == kASTSelectionStmt) { 328 | if (IsTokenWithType(node->op, kTokenKwIf)) { 329 | AnalyzeNode(node->cond, ctx); 330 | FreeReg(node->cond->reg); 331 | AnalyzeNode(node->if_true_stmt, ctx); 332 | if (node->if_else_stmt) { 333 | AnalyzeNode(node->if_else_stmt, ctx); 334 | } 335 | return; 336 | } 337 | } else if (node->type == kASTForStmt) { 338 | if (node->init) { 339 | AnalyzeNode(node->init, ctx); 340 | if (node->init->reg) FreeReg(node->init->reg); 341 | } 342 | if (node->cond) { 343 | AnalyzeNode(node->cond, ctx); 344 | FreeReg(node->cond->reg); 345 | } 346 | if (node->updt) { 347 | AnalyzeNode(node->updt, ctx); 348 | FreeReg(node->updt->reg); 349 | } 350 | AnalyzeNode(node->body, ctx); 351 | return; 352 | } else if (node->type == kASTWhileStmt) { 353 | AnalyzeNode(node->cond, ctx); 354 | FreeReg(node->cond->reg); 355 | AnalyzeNode(node->body, ctx); 356 | return; 357 | } 358 | ErrorWithToken(node->op, "AnalyzeNode: Not implemented"); 359 | } 360 | 361 | struct SymbolEntry *Analyze(struct Node *ast) { 362 | // Returns root context of symbols (including global vars) 363 | struct SymbolEntry *root_ctx = NULL; 364 | in_function = NULL; 365 | AnalyzeNode(ast, &root_ctx); 366 | return root_ctx; 367 | } 368 | -------------------------------------------------------------------------------- /ast.c: -------------------------------------------------------------------------------- 1 | #include "compilium.h" 2 | 3 | bool IsASTList(struct Node *n) { return n && n->type == kASTList; } 4 | bool IsASTDeclOfTypedef(struct Node *n) { 5 | return (n && n->type == kASTDecl && 6 | IsTokenWithType(GetNodeAt(n->op, 0), kTokenKwTypedef)); 7 | } 8 | 9 | bool IsASTDeclOfExtern(struct Node *n) { 10 | return (n && n->type == kASTDecl && 11 | IsTokenWithType(GetNodeAt(n->op, 0), kTokenKwExtern)); 12 | } 13 | 14 | struct Node *AllocNode(enum NodeType type) { 15 | struct Node *node = calloc(1, sizeof(struct Node)); 16 | node->type = type; 17 | return node; 18 | } 19 | 20 | struct Node *CreateASTBinOp(struct Node *t, struct Node *left, 21 | struct Node *right) { 22 | if (!right) ErrorWithToken(t, "Expected expression after binary operator"); 23 | struct Node *op = AllocNode(kASTExpr); 24 | op->op = t; 25 | op->left = left; 26 | op->right = right; 27 | return op; 28 | } 29 | 30 | struct Node *CreateASTUnaryPrefixOp(struct Node *t, struct Node *right) { 31 | if (!right) ErrorWithToken(t, "Expected expression after prefix operator"); 32 | struct Node *op = AllocNode(kASTExpr); 33 | op->op = t; 34 | op->right = right; 35 | return op; 36 | } 37 | 38 | struct Node *CreateASTUnaryPostfixOp(struct Node *left, struct Node *t) { 39 | assert(IsToken(t)); 40 | if (!left) ErrorWithToken(t, "Expected expression before prefix operator"); 41 | struct Node *op = AllocNode(kASTExpr); 42 | op->op = t; 43 | op->left = left; 44 | return op; 45 | } 46 | 47 | struct Node *CreateASTExprStmt(struct Node *t, struct Node *left) { 48 | struct Node *op = AllocNode(kASTExprStmt); 49 | op->op = t; 50 | op->left = left; 51 | return op; 52 | } 53 | 54 | struct Node *CreateASTFuncDef(struct Node *func_decl, struct Node *func_body) { 55 | assert(func_decl && func_decl->type == kASTDecl); 56 | assert(IsASTList(func_body)); 57 | struct Node *n = AllocNode(kASTFuncDef); 58 | n->func_body = func_body; 59 | struct Node *type = CreateTypeFromDecl(func_decl); 60 | assert(type); 61 | n->func_name_token = type->left; 62 | assert(IsToken(n->func_name_token)); 63 | n->func_type = GetTypeWithoutAttr(type); 64 | assert(n->func_type && n->func_type->type == kTypeFunction); 65 | return n; 66 | } 67 | 68 | struct Node *CreateASTKeyValue(const char *key, struct Node *value) { 69 | struct Node *n = AllocNode(kASTKeyValue); 70 | n->key = key; 71 | n->value = value; 72 | return n; 73 | } 74 | 75 | struct Node *CreateASTLocalVar(int byte_offset, struct Node *var_type) { 76 | struct Node *n = AllocNode(kASTLocalVar); 77 | n->byte_offset = byte_offset; 78 | n->expr_type = var_type; 79 | return n; 80 | } 81 | 82 | struct Node *CreateTypeBase(struct Node *t) { 83 | struct Node *n = AllocNode(kTypeBase); 84 | n->op = t; 85 | return n; 86 | } 87 | 88 | struct Node *CreateTypeLValue(struct Node *type) { 89 | struct Node *n = AllocNode(kTypeLValue); 90 | n->right = type; 91 | return n; 92 | } 93 | 94 | struct Node *CreateTypePointer(struct Node *type) { 95 | struct Node *n = AllocNode(kTypePointer); 96 | n->right = type; 97 | return n; 98 | } 99 | 100 | struct Node *CreateTypeFunction(struct Node *return_type, 101 | struct Node *arg_type_list) { 102 | assert(IsASTList(arg_type_list)); 103 | struct Node *n = AllocNode(kTypeFunction); 104 | n->left = return_type; 105 | n->right = arg_type_list; 106 | return n; 107 | } 108 | 109 | struct Node *GetReturnTypeOfFunction(struct Node *func_type) { 110 | assert(func_type && func_type->type == kTypeFunction); 111 | return func_type->left; 112 | } 113 | 114 | struct Node *GetArgTypeList(struct Node *func_type) { 115 | assert(func_type && func_type->type == kTypeFunction); 116 | return func_type->right; 117 | } 118 | 119 | struct Node *CreateTypeStruct(struct Node *tag_token, 120 | struct Node *struct_spec) { 121 | assert(IsToken(tag_token)); 122 | struct Node *n = AllocNode(kTypeStruct); 123 | n->tag = tag_token; 124 | n->type_struct_spec = struct_spec; 125 | return n; 126 | } 127 | 128 | struct Node *CreateTypeAttrIdent(struct Node *ident_token, struct Node *type) { 129 | assert(ident_token && IsToken(ident_token)); 130 | struct Node *n = AllocNode(kTypeAttrIdent); 131 | n->left = ident_token; 132 | n->right = type; 133 | return n; 134 | } 135 | 136 | struct Node *CreateASTIdent(struct Node *ident) { 137 | assert(IsToken(ident)); 138 | struct Node *n = AllocNode(kASTIdent); 139 | n->op = ident; 140 | return n; 141 | } 142 | 143 | struct Node *CreateTypeArray(struct Node *type_of, struct Node *index_decl) { 144 | struct Node *n = AllocNode(kTypeArray); 145 | n->type_array_type_of = type_of; 146 | n->type_array_index_decl = index_decl; 147 | return n; 148 | } 149 | 150 | struct Node *CreateMacroReplacement(struct Node *args_tokens, 151 | struct Node *to_tokens) { 152 | struct Node *n = AllocNode(kNodeMacroReplacement); 153 | n->arg_expr_list = args_tokens; 154 | n->value = to_tokens; 155 | return n; 156 | } 157 | 158 | static void PrintPadding(int depth) { 159 | for (int i = 0; i < depth; i++) { 160 | fputc(' ', stderr); 161 | } 162 | } 163 | 164 | static void PrintASTNodeSub(struct Node *n, int depth) { 165 | if (!n) { 166 | fprintf(stderr, "(null)"); 167 | return; 168 | } 169 | if (IsToken(n)) { 170 | PrintTokenBrief(n); 171 | return; 172 | } 173 | if (n->type == kASTList) { 174 | fprintf(stderr, "["); 175 | if (GetSizeOfList(n) == 0) { 176 | fprintf(stderr, "]"); 177 | return; 178 | } 179 | for (int i = 0; i < GetSizeOfList(n); i++) { 180 | fprintf(stderr, "%s\n", i ? "," : ""); 181 | PrintPadding(depth + 1); 182 | PrintASTNodeSub(GetNodeAt(n, i), depth + 1); 183 | } 184 | fprintf(stderr, "\n"); 185 | PrintPadding(depth); 186 | fprintf(stderr, "]"); 187 | return; 188 | } else if (n->type == kASTStructSpec) { 189 | fprintf(stderr, "StructSpec: "); 190 | PrintASTNodeSub(n->struct_member_dict, depth); 191 | return; 192 | } else if (n->type == kASTKeyValue) { 193 | fprintf(stderr, "%s: ", n->key); 194 | PrintASTNodeSub(n->value, depth); 195 | return; 196 | } else if (n->type == kNodeStructMember) { 197 | fprintf(stderr, "Member +%d: ", n->struct_member_ent_ofs); 198 | PrintASTNodeSub(n->struct_member_ent_type, depth); 199 | return; 200 | } else if (n->type == kNodeMacroReplacement) { 201 | fprintf(stderr, "MacroReplacementarg_expr_list, depth); 203 | fprintf(stderr, ", rep: "); 204 | PrintTokenSequence(n->value); 205 | fprintf(stderr, ">"); 206 | return; 207 | } else if (n->type == kTypeBase) { 208 | PrintTokenStrToFile(n->op, stderr); 209 | return; 210 | } else if (n->type == kTypeLValue) { 211 | fprintf(stderr, "lvalue<"); 212 | PrintASTNodeSub(n->right, depth); 213 | fprintf(stderr, ">"); 214 | return; 215 | } else if (n->type == kTypePointer) { 216 | fprintf(stderr, "pointer_of<"); 217 | PrintASTNodeSub(n->right, depth); 218 | fprintf(stderr, ">"); 219 | return; 220 | } else if (n->type == kTypeFunction) { 221 | fprintf(stderr, "functionleft, depth); 223 | fprintf(stderr, ", args: "); 224 | PrintASTNodeSub(n->right, depth); 225 | fprintf(stderr, ">"); 226 | return; 227 | } else if (n->type == kTypeStruct) { 228 | fprintf(stderr, "structtag, depth); 230 | if (!n->type_struct_spec) { 231 | fprintf(stderr, ", incomplete"); 232 | } 233 | fprintf(stderr, ">"); 234 | return; 235 | } else if (n->type == kTypeArray) { 236 | fprintf(stderr, "array_of<"); 237 | PrintASTNodeSub(n->type_array_type_of, depth); 238 | fprintf(stderr, ">["); 239 | PrintASTNodeSub(n->type_array_index_decl, depth); 240 | fprintf(stderr, "]"); 241 | return; 242 | } else if (n->type == kTypeAttrIdent) { 243 | fputc('`', stderr); 244 | PrintTokenStrToFile(n->left, stderr); 245 | fputc('`', stderr); 246 | fprintf(stderr, " has a type: "); 247 | PrintASTNodeSub(n->right, depth); 248 | return; 249 | } else if (n->type == kASTFuncDef) { 250 | fprintf(stderr, "FuncDef "); 251 | PrintASTNodeSub(n->func_name_token, depth); 252 | fprintf(stderr, " : "); 253 | PrintASTNodeSub(n->func_type, depth); 254 | fprintf(stderr, "{\n"); 255 | PrintPadding(depth + 1); 256 | PrintASTNodeSub(n->func_body, depth + 1); 257 | fprintf(stderr, "\n"); 258 | PrintPadding(depth); 259 | fprintf(stderr, "}"); 260 | return; 261 | } else if (n->type == kASTExprStmt) { 262 | PrintASTNodeSub(n->left, depth); 263 | fprintf(stderr, ";"); 264 | return; 265 | } else if (n->type == kASTExprFuncCall) { 266 | fprintf(stderr, "FuncCall<"); 267 | PrintASTNodeSub(n->func_expr, depth); 268 | fprintf(stderr, ">("); 269 | PrintASTNodeSub(n->arg_expr_list, depth); 270 | fprintf(stderr, ")"); 271 | return; 272 | } 273 | fprintf(stderr, "(op="); 274 | if (n->op) PrintTokenBrief(n->op); 275 | if (n->expr_type) { 276 | fprintf(stderr, ":"); 277 | PrintASTNodeSub(n->expr_type, depth + 1); 278 | } 279 | if (n->reg) fprintf(stderr, " reg: %d", n->reg); 280 | if (n->cond) { 281 | fprintf(stderr, " cond="); 282 | PrintASTNodeSub(n->cond, depth + 1); 283 | } 284 | if (n->left) { 285 | fprintf(stderr, " L="); 286 | PrintASTNodeSub(n->left, depth + 1); 287 | } 288 | if (n->right) { 289 | fprintf(stderr, " R="); 290 | PrintASTNodeSub(n->right, depth + 1); 291 | } 292 | fprintf(stderr, ")"); 293 | } 294 | 295 | void PrintASTNode(struct Node *n) { 296 | PrintASTNodeSub(n, 0); 297 | fputc('\n', stderr); 298 | } 299 | -------------------------------------------------------------------------------- /compilium.c: -------------------------------------------------------------------------------- 1 | #include "compilium.h" 2 | 3 | const char *symbol_prefix; 4 | const char *include_path; 5 | bool is_preprocess_only = false; 6 | 7 | _Noreturn void Error(const char *fmt, ...) { 8 | fflush(stdout); 9 | fprintf(stderr, "Error: "); 10 | va_list ap; 11 | va_start(ap, fmt); 12 | vfprintf(stderr, fmt, ap); 13 | va_end(ap); 14 | fputc('\n', stderr); 15 | exit(EXIT_FAILURE); 16 | } 17 | 18 | _Noreturn void __assert(const char *expr_str, const char *file, int line) { 19 | Error("Assertion failed: %s at %s:%d\n", expr_str, file, line); 20 | } 21 | 22 | void TestList(void); 23 | void TestType(void); 24 | static struct Node *ParseCompilerArgs(int argc, char **argv) { 25 | // returns replacement_list: ASTList which contains macro replacement 26 | struct Node *replacement_list = AllocList(); 27 | symbol_prefix = "_"; 28 | for (int i = 1; i < argc; i++) { 29 | if (strcmp(argv[i], "--target-os") == 0) { 30 | i++; 31 | if (strcmp(argv[i], "Darwin") == 0) { 32 | symbol_prefix = "_"; 33 | // Define __APPLE__ macro 34 | PushKeyValueToList(replacement_list, "__APPLE__", 35 | CreateMacroReplacement(NULL, NULL)); 36 | } else if (strcmp(argv[i], "Linux") == 0) { 37 | symbol_prefix = ""; 38 | } else { 39 | Error("Unknown os type %s", argv[i]); 40 | } 41 | } else if (strcmp(argv[i], "-I") == 0) { 42 | i++; 43 | include_path = argv[i]; 44 | assert(include_path); 45 | if (include_path[strlen(include_path) - 1] != '/') { 46 | Error("Include path (-I ) should be ended with '/'"); 47 | } 48 | fprintf(stderr, "Include path: %s\n", include_path); 49 | } else if (strcmp(argv[i], "--run-unittest=List") == 0) { 50 | TestList(); 51 | } else if (strcmp(argv[i], "--run-unittest=Type") == 0) { 52 | TestType(); 53 | } else if (strcmp(argv[i], "-E") == 0) { 54 | is_preprocess_only = true; 55 | } else { 56 | Error("Unknown argument: %s", argv[i]); 57 | } 58 | } 59 | return replacement_list; 60 | } 61 | 62 | void PrintTokenLine(struct Node *t) { 63 | assert(t); 64 | const char *line_begin = t->begin; 65 | while (t->src_str < line_begin) { 66 | if (line_begin[-1] == '\n') break; 67 | line_begin--; 68 | } 69 | 70 | fprintf(stderr, "Line %d:\n", t->line); 71 | 72 | for (const char *p = line_begin; *p && *p != '\n'; p++) { 73 | fputc(*p <= ' ' ? ' ' : *p, stderr); 74 | } 75 | fputc('\n', stderr); 76 | const char *p; 77 | for (p = line_begin; p < t->begin; p++) { 78 | fputc(' ', stderr); 79 | } 80 | for (int i = 0; i < t->length; i++) { 81 | fputc('^', stderr); 82 | p++; 83 | } 84 | for (; *p && *p != '\n'; p++) { 85 | fputc(' ', stderr); 86 | } 87 | fputc('\n', stderr); 88 | } 89 | 90 | _Noreturn void ErrorWithToken(struct Node *t, const char *fmt, ...) { 91 | PrintTokenLine(t); 92 | 93 | fprintf(stderr, "Error: "); 94 | va_list ap; 95 | va_start(ap, fmt); 96 | vfprintf(stderr, fmt, ap); 97 | va_end(ap); 98 | fputc('\n', stderr); 99 | exit(EXIT_FAILURE); 100 | } 101 | 102 | struct Node *AllocList() { 103 | return AllocNode(kASTList); 104 | } 105 | 106 | void ExpandListSizeIfNeeded(struct Node *list) { 107 | if (list->size < list->capacity) return; 108 | list->capacity = (list->capacity + 1) * 2; 109 | list->nodes = realloc(list->nodes, sizeof(struct Node *) * list->capacity); 110 | assert(list->nodes); 111 | assert(list->size < list->capacity); 112 | } 113 | 114 | void PushToList(struct Node *list, struct Node *node) { 115 | ExpandListSizeIfNeeded(list); 116 | list->nodes[list->size++] = node; 117 | } 118 | 119 | void PushKeyValueToList(struct Node *list, const char *key, 120 | struct Node *value) { 121 | assert(key && value); 122 | ExpandListSizeIfNeeded(list); 123 | list->nodes[list->size++] = CreateASTKeyValue(key, value); 124 | } 125 | 126 | int GetSizeOfList(struct Node *list) { 127 | assert(list && list->type == kASTList); 128 | return list->size; 129 | } 130 | 131 | struct Node *GetNodeAt(struct Node *list, int index) { 132 | assert(list && list->type == kASTList); 133 | assert(0 <= index && index < list->size); 134 | return list->nodes[index]; 135 | } 136 | 137 | struct Node *GetNodeByTokenKey(struct Node *list, struct Node *key) { 138 | assert(list && list->type == kASTList); 139 | for (int i = 0; i < list->size; i++) { 140 | struct Node *n = list->nodes[i]; 141 | if (n->type != kASTKeyValue) continue; 142 | if (IsEqualTokenWithCStr(key, n->key)) return n->value; 143 | } 144 | return NULL; 145 | } 146 | 147 | struct Node *GetNodeByKey(struct Node *list, const char *key) { 148 | assert(list && list->type == kASTList); 149 | for (int i = 0; i < list->size; i++) { 150 | struct Node *n = list->nodes[i]; 151 | if (n->type != kASTKeyValue) continue; 152 | if (strcmp(n->key, key) == 0) return n->value; 153 | } 154 | return NULL; 155 | } 156 | 157 | void TestList() { 158 | fprintf(stderr, "Testing List..."); 159 | 160 | struct Node *list = AllocList(); 161 | struct Node *item1 = AllocNode(kNodeNone); 162 | struct Node *item2 = AllocNode(kNodeNone); 163 | assert(list); 164 | 165 | PushToList(list, item1); 166 | assert(GetSizeOfList(list) == 1); 167 | PushToList(list, item2); 168 | assert(GetSizeOfList(list) == 2); 169 | 170 | assert(GetNodeAt(list, 0) == item1); 171 | assert(GetNodeAt(list, 1) == item2); 172 | 173 | int base_capacity = list->capacity; 174 | while (GetSizeOfList(list) <= base_capacity) { 175 | PushToList(list, item1); 176 | } 177 | assert(list->capacity > base_capacity); 178 | assert(GetNodeAt(list, 0) == item1); 179 | assert(GetNodeAt(list, 1) == item2); 180 | assert(GetNodeAt(list, GetSizeOfList(list) - 1) == item1); 181 | 182 | PushKeyValueToList(list, "item1", item1); 183 | PushKeyValueToList(list, "item2", item2); 184 | assert(GetNodeByKey(list, "item1") == item1); 185 | assert(GetNodeByKey(list, "item2") == item2); 186 | assert(GetNodeByKey(list, "not_existed") == NULL); 187 | 188 | fprintf(stderr, "PASS\n"); 189 | exit(EXIT_SUCCESS); 190 | } 191 | 192 | // System V AMD64 ABI: 193 | // args: 194 | // RDI, RSI, RDX, RCX, R8, R9 195 | // callee-saved(should be kept on return): 196 | // RBX, RBP, R12, R13, R14, R15 197 | // caller-saved(can be destroyed): 198 | // otherwise 199 | 200 | // Compilium register plan: 201 | // RAX: reserved for return values 202 | // RCX: reserved for shift ops 203 | // RDX: 3rd parameter, reserved for div/mul ops 204 | // RBX: reserved (callee-saved) 205 | // RSP: reserved for stack pointer 206 | // RBP: reserved for frame pointer 207 | // RSI: 2nd parameter 208 | // RDI: 1st parameter 209 | // R8 : 5th parameter 210 | // R9 : 6th parameter 211 | // R10: scratch 212 | // R11: scratch 213 | // R12: reserved (callee-saved) 214 | // R13: reserved (callee-saved) 215 | // R14: reserved (callee-saved) 216 | // R15: reserved (callee-saved) 217 | 218 | const char *reg_names_64[NUM_OF_SCRATCH_REGS + 1] = { 219 | // padding 220 | NULL, 221 | // params 222 | "rdi", "rsi", "r8", "r9", 223 | // scratch 224 | "r10", "r11", 225 | // callee-saved 226 | "r12", "r13", "r14", "r15"}; 227 | const char *reg_names_32[NUM_OF_SCRATCH_REGS + 1] = { 228 | // padding 229 | NULL, 230 | // params 231 | "edi", "esi", "r8d", "r9d", 232 | // scratch 233 | "r10d", "r11d", 234 | // callee-saved 235 | "r12d", "r13d", "r14d", "r15d"}; 236 | const char *reg_names_8[NUM_OF_SCRATCH_REGS + 1] = { 237 | // padding 238 | NULL, 239 | // params 240 | "dil", "sil", "r8b", "r9b", 241 | // scratch 242 | "r10b", "r11b", 243 | // callee-saved 244 | "r12b", "r13b", "r14b", "r15d"}; 245 | const char *param_reg_names_64[NUM_OF_PARAM_REGISTERS] = {"rdi", "rsi", "rdx", 246 | "rcx", "r8", "r9"}; 247 | const char *param_reg_names_32[NUM_OF_PARAM_REGISTERS] = {"edi", "esi", "edx", 248 | "ecx", "r8d", "r9d"}; 249 | const char *param_reg_names_8[NUM_OF_PARAM_REGISTERS] = {"dl", "sil", "dl", 250 | "cl", "r8b", "r9b"}; 251 | 252 | #define INITIAL_INPUT_SIZE 8192 253 | const char *ReadFile(FILE *fp) { 254 | int buf_size = INITIAL_INPUT_SIZE; 255 | char *input = malloc(buf_size); 256 | int input_size = 0; 257 | int c; 258 | while ((c = fgetc(fp)) != EOF) { 259 | input[input_size++] = c; 260 | if (input_size == buf_size) { 261 | buf_size <<= 1; 262 | assert((input = realloc(input, buf_size))); 263 | } 264 | } 265 | assert(input_size < buf_size); 266 | input[input_size] = 0; 267 | return input; 268 | } 269 | 270 | int main(int argc, char *argv[]) { 271 | struct Node *replacement_list = ParseCompilerArgs(argc, argv); 272 | const char *input = ReadFile(stdin); 273 | 274 | struct Node *tokens = Tokenize(input); 275 | 276 | fputs("Preprocess begin\n", stderr); 277 | Preprocess(&tokens, replacement_list); 278 | if (is_preprocess_only) { 279 | OutputTokenSequenceAsCSource(tokens); 280 | return 0; 281 | } 282 | 283 | fputs("Parse begin\n", stderr); 284 | struct Node *ast = Parse(&tokens); 285 | PrintASTNode(ast); 286 | fputc('\n', stderr); 287 | 288 | fputs("Analyze begin\n", stderr); 289 | struct SymbolEntry *ctx = Analyze(ast); 290 | PrintASTNode(ast); 291 | fputc('\n', stderr); 292 | 293 | Generate(ast, ctx); 294 | return 0; 295 | } 296 | -------------------------------------------------------------------------------- /compilium.h: -------------------------------------------------------------------------------- 1 | #include "include/stdarg.h" 2 | #include "include/stdbool.h" 3 | #include "include/stdio.h" 4 | #include "include/stdlib.h" 5 | #include "include/string.h" 6 | 7 | char *strndup(const char *s, size_t n); 8 | char *strdup(const char *s); 9 | 10 | #define assert(expr) \ 11 | ((void)((expr) || (__assert(#expr, __FILE__, __LINE__), 0))) 12 | 13 | enum NodeType { 14 | kNodeNone, 15 | kNodeToken, 16 | kNodeStructMember, 17 | kNodeMacroReplacement, 18 | // 19 | kASTExpr, 20 | kASTExprFuncCall, 21 | kASTList, 22 | kASTExprStmt, 23 | kASTJumpStmt, 24 | kASTSelectionStmt, 25 | kASTIdent, 26 | kASTDirectDecltor, 27 | kASTDecltor, 28 | kASTDecl, 29 | kASTForStmt, 30 | kASTWhileStmt, 31 | kASTFuncDef, 32 | kASTKeyValue, 33 | kASTLocalVar, 34 | kASTStructSpec, 35 | // 36 | kTypeBase, 37 | kTypeLValue, 38 | kTypePointer, 39 | kTypeFunction, 40 | kTypeAttrIdent, 41 | kTypeStruct, 42 | kTypeArray, 43 | }; 44 | 45 | enum TokenType { 46 | kTokenUnknownChar, 47 | kTokenDelimiter, 48 | kTokenZeroWidthNoBreakSpace, 49 | kTokenIntegerConstant, 50 | kTokenIdent, 51 | kTokenKwBreak, 52 | kTokenKwChar, 53 | kTokenKwConst, 54 | kTokenKwContinue, 55 | kTokenKwElse, 56 | kTokenKwExtern, 57 | kTokenKwFor, 58 | kTokenKwIf, 59 | kTokenKwInt, 60 | kTokenKwLong, 61 | kTokenKwReturn, 62 | kTokenKwSizeof, 63 | kTokenKwStatic, 64 | kTokenKwStruct, 65 | kTokenKwTypedef, 66 | kTokenKwUnsigned, 67 | kTokenKwVoid, 68 | kTokenKwWhile, 69 | kTokenCharLiteral, 70 | kTokenStringLiteral, 71 | kTokenPunctuator, 72 | kTokenLineComment, 73 | kTokenBlockCommentBegin, 74 | kTokenBlockCommentEnd, 75 | }; 76 | 77 | /* 78 | Node if-stmt: 79 | stmt->cond = cond-expr 80 | stmt->left = true-stmt 81 | stmt->right = false-stmt or null 82 | 83 | Node func-call-expr: 84 | expr->func_expr 85 | expr->arg_expr_list 86 | 87 | Node expr-stmt: 88 | stmt->op = token(;) 89 | stmt->left = node 90 | */ 91 | 92 | struct Node { 93 | enum NodeType type; 94 | int reg; 95 | struct Node *expr_type; 96 | struct Node *op; 97 | struct Node *left; 98 | struct Node *right; 99 | struct Node *init; 100 | struct Node *cond; 101 | struct Node *updt; 102 | struct Node *body; 103 | struct Node *if_true_stmt; 104 | struct Node *if_else_stmt; 105 | struct Node *decltor_init_expr; 106 | struct Node *decltor_init_stmt; 107 | struct Node *struct_member_dict; 108 | struct Node *struct_member_ent_type; 109 | struct Node *struct_member_decl; 110 | int struct_member_ent_ofs; 111 | // for list 112 | int capacity; 113 | int size; 114 | struct Node **nodes; 115 | // for key value 116 | const char *key; 117 | struct Node *value; 118 | // for local var 119 | int byte_offset; 120 | // for string literal 121 | int label_number; 122 | // kASTExprFuncCall 123 | struct Node *func_expr; 124 | struct Node *arg_expr_list; 125 | struct Node *arg_var_list; 126 | int stack_size_needed; 127 | // kASTFuncDef 128 | struct Node *func_body; 129 | struct Node *func_type; 130 | struct Node *func_name_token; 131 | struct Node *tag; 132 | struct Node *type_struct_spec; 133 | struct Node *type_array_type_of; 134 | struct Node *type_array_index_decl; 135 | // kNodeToken 136 | enum TokenType token_type; 137 | struct Node *next_token; 138 | const char *begin; 139 | int length; 140 | const char *src_str; 141 | int line; 142 | }; 143 | 144 | _Noreturn void Error(const char *fmt, ...); 145 | _Noreturn void __assert(const char *expr_str, const char *file, int line); 146 | 147 | void PrintTokenLine(struct Node *t); 148 | _Noreturn void ErrorWithToken(struct Node *t, const char *fmt, ...); 149 | 150 | void PushToList(struct Node *list, struct Node *node); 151 | void PushKeyValueToList(struct Node *list, const char *key, struct Node *value); 152 | 153 | struct Node *AllocList(); 154 | int GetSizeOfList(struct Node *list); 155 | struct Node *GetNodeAt(struct Node *list, int index); 156 | struct Node *GetNodeByTokenKey(struct Node *list, struct Node *key); 157 | 158 | extern const char *symbol_prefix; 159 | extern const char *include_path; 160 | 161 | #define NUM_OF_SCRATCH_REGS 10 162 | extern const char *reg_names_64[NUM_OF_SCRATCH_REGS + 1]; 163 | extern const char *reg_names_32[NUM_OF_SCRATCH_REGS + 1]; 164 | extern const char *reg_names_8[NUM_OF_SCRATCH_REGS + 1]; 165 | 166 | #define NUM_OF_PARAM_REGISTERS 6 167 | extern const char *param_reg_names_64[NUM_OF_PARAM_REGISTERS]; 168 | extern const char *param_reg_names_32[NUM_OF_PARAM_REGISTERS]; 169 | extern const char *param_reg_names_8[NUM_OF_PARAM_REGISTERS]; 170 | 171 | // @analyzer.c 172 | struct SymbolEntry *Analyze(struct Node *node); 173 | 174 | // @ast.c 175 | bool IsToken(struct Node *n); 176 | bool IsTokenWithType(struct Node *n, enum TokenType type); 177 | bool IsASTList(struct Node *); 178 | bool IsASTDeclOfTypedef(struct Node *n); 179 | bool IsASTDeclOfExtern(struct Node *n); 180 | struct Node *AllocNode(enum NodeType type); 181 | struct Node *CreateASTBinOp(struct Node *t, struct Node *left, 182 | struct Node *right); 183 | struct Node *CreateASTUnaryPrefixOp(struct Node *t, struct Node *right); 184 | struct Node *CreateASTUnaryPostfixOp(struct Node *left, struct Node *t); 185 | struct Node *CreateASTExprStmt(struct Node *t, struct Node *left); 186 | struct Node *CreateASTFuncDef(struct Node *func_decl, struct Node *func_body); 187 | 188 | struct Node *CreateASTKeyValue(const char *key, struct Node *value); 189 | 190 | struct Node *CreateASTLocalVar(int byte_offset, struct Node *var_type); 191 | 192 | struct Node *CreateTypeBase(struct Node *t); 193 | 194 | struct Node *CreateTypeLValue(struct Node *type); 195 | 196 | struct Node *CreateTypePointer(struct Node *type); 197 | struct Node *CreateTypeFunction(struct Node *return_type, 198 | struct Node *arg_type_list); 199 | struct Node *GetReturnTypeOfFunction(struct Node *); 200 | struct Node *GetArgTypeList(struct Node *func_type); 201 | struct Node *CreateTypeStruct(struct Node *tag_token, struct Node *struct_spec); 202 | struct Node *CreateTypeAttrIdent(struct Node *ident_token, struct Node *type); 203 | struct Node *CreateASTIdent(struct Node *ident); 204 | struct Node *CreateTypeArray(struct Node *type_of, struct Node *index_decl); 205 | struct Node *CreateMacroReplacement(struct Node *args_tokens, 206 | struct Node *to_tokens); 207 | void PrintASTNode(struct Node *n); 208 | 209 | // @compilium.c 210 | const char *ReadFile(FILE *fp); 211 | 212 | // @generate.c 213 | void Generate(struct Node *ast, struct SymbolEntry *); 214 | 215 | // @parser.c 216 | extern struct Node *toplevel_names; 217 | void InitParser(struct Node **); 218 | struct Node *Parse(struct Node **passed_tokens); 219 | 220 | // @preprocessor.c 221 | void Preprocess(struct Node **head_holder, struct Node *replacement_list); 222 | 223 | // @struct.c 224 | struct SymbolEntry; 225 | int CalcStructSize(struct Node *spec); 226 | int CalcStructAlign(struct Node *spec); 227 | void AddMemberOfStructFromDecl(struct Node *struct_spec, struct Node *decl); 228 | struct Node *FindStructMember(struct Node *struct_type, struct Node *key_token); 229 | void ResolveTypesOfMembersOfStruct(struct SymbolEntry *ctx, struct Node *spec); 230 | 231 | // @symbol.c 232 | enum SymbolType { 233 | kSymbolLocalVar, 234 | kSymbolGlobalVar, 235 | kSymbolExternVar, 236 | kSymbolFuncDef, 237 | kSymbolFuncDeclType, 238 | kSymbolStructType, 239 | }; 240 | struct SymbolEntry { 241 | enum SymbolType type; 242 | struct SymbolEntry *prev; 243 | const char *key; 244 | struct Node *value; 245 | }; 246 | int GetLastLocalVarOffset(struct SymbolEntry *); 247 | struct Node *AddLocalVar(struct SymbolEntry **ctx, const char *key, 248 | struct Node *var_type); 249 | void AddExternVar(struct SymbolEntry **ctx, const char *key, 250 | struct Node *var_type); 251 | void AddGlobalVar(struct SymbolEntry **ctx, const char *key, 252 | struct Node *var_type); 253 | struct Node *FindExternVar(struct SymbolEntry *e, struct Node *key_token); 254 | struct Node *FindGlobalVar(struct SymbolEntry *e, struct Node *key_token); 255 | struct Node *FindLocalVar(struct SymbolEntry *e, struct Node *key_token); 256 | void AddFuncDef(struct SymbolEntry **ctx, const char *key, 257 | struct Node *func_def); 258 | struct Node *FindFuncDef(struct SymbolEntry *e, struct Node *key_token); 259 | void AddFuncDeclType(struct SymbolEntry **ctx, const char *key, 260 | struct Node *func_decl); 261 | struct Node *FindFuncDeclType(struct SymbolEntry *e, struct Node *key_token); 262 | void AddStructType(struct SymbolEntry **, const char *, struct Node *); 263 | struct Node *FindStructType(struct SymbolEntry *, struct Node *); 264 | 265 | // @token.c 266 | bool IsToken(struct Node *n); 267 | struct Node *AllocToken(const char *src_str, int line, const char *begin, 268 | int length, enum TokenType type); 269 | struct Node *DuplicateToken(struct Node *base_token); 270 | struct Node *DuplicateTokenSequence(struct Node *base_head); 271 | char *CreateTokenStr(struct Node *t); 272 | int IsEqualTokenWithCStr(struct Node *t, const char *s); 273 | void PrintTokenSequence(struct Node *t); 274 | void OutputTokenSequenceAsCSource(struct Node *t); 275 | void PrintToken(struct Node *t); 276 | void PrintTokenBrief(struct Node *t); 277 | void PrintTokenStrToFile(struct Node *t, FILE *fp); 278 | 279 | void InitTokenStream(struct Node **head_token); 280 | struct Node *PeekToken(void); 281 | struct Node *ReadToken(enum TokenType type); 282 | struct Node *ConsumeToken(enum TokenType type); 283 | struct Node *ConsumeTokenStr(const char *s); 284 | struct Node *ExpectTokenStr(const char *s); 285 | struct Node *ConsumePunctuator(const char *s); 286 | struct Node *ExpectPunctuator(const char *s); 287 | struct Node *NextToken(void); 288 | void RemoveCurrentToken(void); 289 | void RemoveTokensTo(struct Node *end); 290 | void InsertTokens(struct Node *); 291 | void InsertTokensWithIdentReplace(struct Node *seq, struct Node *rep_list); 292 | struct Node **RemoveDelimiterTokens(struct Node **); 293 | 294 | // @tokenizer.c 295 | struct Node *CreateToken(const char *input); 296 | struct Node *Tokenize(const char *input); 297 | 298 | // @type.c 299 | int IsSameTypeExceptAttr(struct Node *a, struct Node *b); 300 | int IsLValueType(struct Node *t); 301 | struct Node *GetTypeWithoutAttr(struct Node *t); 302 | struct Node *GetIdentifierTokenFromTypeAttr(struct Node *t); 303 | struct Node *GetRValueType(struct Node *t); 304 | int GetSizeOfType(struct Node *t); 305 | int GetAlignOfType(struct Node *t); 306 | struct Node *CreateTypeInContext(struct SymbolEntry *ctx, 307 | struct Node *decl_spec, struct Node *decltor); 308 | struct Node *CreateType(struct Node *decl_spec, struct Node *decltor); 309 | struct Node *CreateTypeFromDecl(struct Node *decl); 310 | struct Node *CreateTypeFromDeclInContext(struct SymbolEntry *ctx, 311 | struct Node *decl); 312 | -------------------------------------------------------------------------------- /examples/.gitignore: -------------------------------------------------------------------------------- 1 | *.S 2 | *.bin 3 | ctests 4 | -------------------------------------------------------------------------------- /examples/Makefile: -------------------------------------------------------------------------------- 1 | default : calc.bin ctests.bin gameoflife.bin hello.bin pi.bin fib.bin 2 | 3 | .FORCE : 4 | 5 | run_ctests : ctests.bin ../compilium 6 | make run_ctests_hostcc 7 | ./ctests.bin 8 | 9 | run_ctests_hostcc : ctests.host.bin 10 | ./ctests.host.bin 11 | 12 | validate : run_ctests_hostcc 13 | 14 | ../compilium : .FORCE 15 | make -C .. compilium 16 | 17 | ../compilium_dbg : .FORCE 18 | make -C .. compilium_dbg 19 | 20 | constsum.c : gen_constsum.js 21 | node gen_constsum.js > $@ 22 | 23 | %.host.bin : %.c Makefile 24 | $(CC) -o $*.host.bin $*.c 25 | 26 | %.host_o3.bin : %.c Makefile 27 | $(CC) -O3 -o $*.host_o3.bin $*.c 28 | 29 | LLDB_ARGS = -o 'settings set interpreter.prompt-on-quit false' \ 30 | -o 'b __assert' \ 31 | -o 'b ErrorWithToken' \ 32 | -o 'process launch' 33 | 34 | debug_% : ../compilium_dbg %.c 35 | lldb \ 36 | -o 'settings set target.input-path $*.c' $(LLDB_ARGS) \ 37 | -- ../compilium_dbg --target-os `uname` 38 | 39 | %.host.S : %.c Makefile ../compilium .FORCE 40 | $(CC) -S -o $@ $*.c 41 | 42 | %.S : %.c Makefile ../compilium .FORCE 43 | ../compilium --target-os `uname` -I ../include/ < $*.c > $*.S 44 | 45 | format: 46 | clang-format -i *.c 47 | 48 | %.bin : %.S Makefile 49 | $(CC) -o $@ $*.S 50 | 51 | clean: 52 | -rm *.bin 53 | -rm *.S 54 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | ``` 2 | rlwrap ./jsondump.bin 3 | ``` 4 | -------------------------------------------------------------------------------- /examples/calc.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define MAX_INPUT_SIZE 128 5 | 6 | void assert(int v) { 7 | if (v) return; 8 | printf("Assertion failed!"); 9 | } 10 | 11 | #define INITIAL_INPUT_SIZE 8192 12 | static const char *ReadLine(void) { 13 | int buf_size = INITIAL_INPUT_SIZE; 14 | char *input = NULL; 15 | int input_size = 0; 16 | int c; 17 | input = malloc(buf_size); 18 | while ((c = getchar()) != EOF && c != '\n') { 19 | input[input_size++] = c; 20 | if (input_size == buf_size) { 21 | buf_size <<= 1; 22 | assert((input = realloc(input, buf_size))); 23 | } 24 | } 25 | assert(input_size < buf_size); 26 | input[input_size] = 0; 27 | return input; 28 | } 29 | 30 | struct Token { 31 | const char *begin; 32 | int length; 33 | const char *src_str; 34 | int line; 35 | struct Node *next_token; 36 | }; 37 | 38 | struct Token *AllocToken(const char *src_str, int line, const char *begin, 39 | int length) { 40 | struct Token *t; 41 | t = malloc(sizeof(*t)); 42 | t->begin = begin; 43 | t->length = length; 44 | t->src_str = src_str; 45 | t->line = line; 46 | t->next_token = NULL; 47 | return t; 48 | } 49 | 50 | static struct Token *CreateNextToken(const char *p, const char *src, 51 | int *line) { 52 | assert(line); 53 | if (!*p) return NULL; 54 | if (*p == ' ') { 55 | return AllocToken(src, *line, p, 1); 56 | } 57 | if (*p == '\n') { 58 | (*line)++; 59 | return AllocToken(src, *line, p, 1); 60 | } 61 | if (p[0] == '\\' && p[1] == '\n') { 62 | (*line)++; 63 | return AllocToken(src, *line, p, 2); 64 | } 65 | if ('1' <= *p && *p <= '9') { 66 | int length = 0; 67 | while ('0' <= p[length] && p[length] <= '9') { 68 | length++; 69 | } 70 | return AllocToken(src, *line, p, length); 71 | } else if ('0' == *p) { 72 | int length = 0; 73 | if (p[1] == 'x') { 74 | // Hexadecimal 75 | length += 2; 76 | while (('0' <= p[length] && p[length] <= '9') || 77 | ('A' <= p[length] && p[length] <= 'F') || 78 | ('a' <= p[length] && p[length] <= 'f')) { 79 | length++; 80 | } 81 | } else { 82 | // Octal 83 | while ('0' <= p[length] && p[length] <= '7') { 84 | length++; 85 | } 86 | } 87 | return AllocToken(src, *line, p, length); 88 | } else if (('A' <= *p && *p <= 'Z') || ('a' <= *p && *p <= 'z') || 89 | *p == '_') { 90 | int length = 0; 91 | while (('A' <= p[length] && p[length] <= 'Z') || 92 | ('a' <= p[length] && p[length] <= 'z') || p[length] == '_' || 93 | ('0' <= p[length] && p[length] <= '9')) { 94 | length++; 95 | } 96 | struct Node *t = AllocToken(src, *line, p, length); 97 | return t; 98 | } 99 | printf("Unexpected char %c\n", *p); 100 | exit(1); 101 | } 102 | 103 | struct Token *Tokenize(const char *input) { 104 | // returns head of tokens. 105 | struct Token *token_head = NULL; 106 | struct Token **last_next_token = &token_head; 107 | const char *p = input; 108 | struct Token *t; 109 | int line = 1; 110 | while ((t = CreateNextToken(p, input, &line))) { 111 | *last_next_token = t; 112 | last_next_token = &t->next_token; 113 | p = t->begin + t->length; 114 | } 115 | return token_head; 116 | } 117 | 118 | int main() { 119 | fputs("Hello stderr!\n", stderr); 120 | const char *input = ReadLine(); 121 | struct Token *tokens = Tokenize(input); 122 | for (struct Token *t = tokens; t; t = t->next_token) { 123 | putchar('<'); 124 | printf("%.*s", t->length, t->begin); 125 | putchar('>'); 126 | } 127 | putchar('\n'); 128 | return 0; 129 | } 130 | -------------------------------------------------------------------------------- /examples/collatz.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | static int collatz(int n) { 4 | // printf("%d->", n); 5 | if (n == 1) return 1; 6 | if (n & 1) { 7 | n = 3 * n + 1; 8 | } else { 9 | n /= 2; 10 | } 11 | return collatz(n) + 1; 12 | } 13 | 14 | int main() { 15 | for (int k = 0; k < 10; k++) { 16 | for (int i = 1; i < 100000; i++) { 17 | printf("%d: ", i); 18 | int cycle = collatz(i); 19 | printf("OK! cycle = %d\n", cycle); 20 | } 21 | } 22 | return 0; 23 | } 24 | -------------------------------------------------------------------------------- /examples/ctests.c: -------------------------------------------------------------------------------- 1 | // This is a first line comment 2 | // This is a second line comment 3 | // This is a third line comment 4 | /* This is a block comment */ 5 | /* 6 | This is a block comment 1 7 | */ 8 | /* 9 | This is a block comment 2 10 | */ 11 | /* 12 | This is a block comment 3 13 | */ 14 | int puts(char*); 15 | int printf(char*, int); 16 | void exit(int); 17 | 18 | void ExpectEq(int actual, int expected, int line) { 19 | printf("Line %3d: ", line); 20 | if (actual != expected) { 21 | puts("FAIL"); 22 | exit(1); 23 | } 24 | puts("PASS"); 25 | } 26 | 27 | int TestIfStmtTrueCase() { 28 | if (1) return 3; 29 | return 5; 30 | } 31 | 32 | int TestIfStmtFalseCase() { 33 | if (0) return 3; 34 | return 5; 35 | } 36 | 37 | int TestIfStmtElseCase() { 38 | if (0) { 39 | return 2; 40 | } else { 41 | return 3; // expected 42 | } 43 | return 5; 44 | } 45 | 46 | int TestIfStmtNestedElseCase(int v) { 47 | int ans; 48 | ans = 1; 49 | if (v & 1) 50 | ans = ans * 2; 51 | else if (v & 2) 52 | ans = ans * 3; 53 | else if (v & 4) 54 | ans = ans * 5; 55 | ans = ans * 7; 56 | return ans; 57 | } 58 | 59 | int TestIfStmtWithCompStmt() { 60 | if (1) { 61 | return 3; 62 | } 63 | return 5; 64 | } 65 | 66 | int TestInitializer() { 67 | int v0 = 3; 68 | int v1 = v0 * 5 + 7; 69 | return v0 + v1; // 25 70 | } 71 | 72 | int TestCompAssignPlusEq(int vL, int vR) { 73 | int v = vL; 74 | vL += vR; 75 | return vL; 76 | } 77 | 78 | int TestCompAssignMinusEq(int vL, int vR) { 79 | int v = vL; 80 | vL -= vR; 81 | return vL; 82 | } 83 | 84 | int TestCompAssignMulEq(int vL, int vR) { 85 | int v = vL; 86 | vL *= vR; 87 | return vL; 88 | } 89 | 90 | int TestCompAssignDivEq(int vL, int vR) { 91 | int v = vL; 92 | vL /= vR; 93 | return vL; 94 | } 95 | 96 | int TestCompAssignModEq(int vL, int vR) { 97 | int v = vL; 98 | vL %= vR; 99 | return vL; 100 | } 101 | 102 | int TestCompAssignLShift(int vL, int vR) { 103 | int v = vL; 104 | vL <<= vR; 105 | return vL; 106 | } 107 | 108 | int TestCompAssignRShift(int vL, int vR) { 109 | int v = vL; 110 | vL >>= vR; 111 | return vL; 112 | } 113 | 114 | struct Point3D { 115 | int x; 116 | int y; 117 | int z; 118 | }; 119 | 120 | struct Point2D { 121 | int x; 122 | int y; 123 | }; 124 | 125 | void TestSizeof() { 126 | char c; 127 | ExpectEq(sizeof(c), 1, __LINE__); 128 | int a; 129 | ExpectEq(sizeof(a), 4, __LINE__); 130 | int* p; 131 | ExpectEq(sizeof(p), 8, __LINE__); 132 | struct IncompleteStruct* incomplete_struct; 133 | ExpectEq(sizeof(incomplete_struct), 8, __LINE__); 134 | struct Point2D p2d; 135 | ExpectEq(sizeof(p2d), 8, __LINE__); 136 | struct Point3D p3d; 137 | ExpectEq(sizeof(p3d), 12, __LINE__); 138 | char s[2 + 3 + 5]; 139 | ExpectEq(sizeof(s), 10, __LINE__); 140 | } 141 | 142 | struct Line2D { 143 | struct Point2D p; 144 | struct Point2D q; 145 | }; 146 | 147 | void TestStructVecSum(int x0, int y0, int x1, int y1, int x_expected, 148 | int y_expected) { 149 | struct Point2D v0; 150 | struct Point2D v1; 151 | v0.x = x0; 152 | v0.y = y0; 153 | v1.x = x1; 154 | v1.y = y1; 155 | ExpectEq(v0.x + v1.x, x_expected, __LINE__); 156 | ExpectEq(v0.y + v1.y, y_expected, __LINE__); 157 | } 158 | 159 | void TestStructVecSumRef(int x0, int y0, int x1, int y1, int x_expected, 160 | int y_expected) { 161 | struct Point2D v0; 162 | struct Point2D v1; 163 | struct Point2D* vp0 = &v0; 164 | struct Point2D* vp1 = &v1; 165 | struct Point2D** vpp0 = &vp0; 166 | struct Point2D** vpp1 = &vp1; 167 | v0.x = x0; 168 | v0.y = y0; 169 | vp1->x = x1; 170 | (*vpp1)->y = y1; 171 | ExpectEq((*vpp0)->x + v1.x, x_expected, __LINE__); 172 | ExpectEq(vp0->y + v1.y, y_expected, __LINE__); 173 | } 174 | 175 | int TestArray(int v0, int v1, int v2, int idx) { 176 | int a[3]; 177 | a[0] = v0; 178 | a[1] = v1; 179 | a[2] = v2; 180 | return a[idx]; 181 | } 182 | 183 | int TestWhileLoop(int idx_begin, int idx_end, int idx_inc, int value_inc) { 184 | int v = 0; 185 | int i = idx_begin; 186 | while (i < idx_end) { 187 | v += value_inc; 188 | i += idx_inc; 189 | } 190 | return v; 191 | } 192 | 193 | int TestForLoop(int idx_begin, int idx_end, int idx_inc, int value_inc) { 194 | int v = 0; 195 | for (int i = idx_begin; i < idx_end; i += idx_inc) { 196 | v += value_inc; 197 | } 198 | return v; 199 | } 200 | 201 | void TestInc() { 202 | int v = 2; 203 | ExpectEq(v++, 2, __LINE__); 204 | ExpectEq(v, 3, __LINE__); 205 | ExpectEq(v++, 3, __LINE__); 206 | ExpectEq(v, 4, __LINE__); 207 | v = 2; 208 | ExpectEq(v, 2, __LINE__); 209 | ExpectEq(++v, 3, __LINE__); 210 | ExpectEq(v, 3, __LINE__); 211 | ExpectEq(++v, 4, __LINE__); 212 | } 213 | 214 | void TestDec() { 215 | int v = 2; 216 | ExpectEq(v--, 2, __LINE__); 217 | ExpectEq(v, 1, __LINE__); 218 | ExpectEq(v--, 1, __LINE__); 219 | ExpectEq(v, 0, __LINE__); 220 | v = 2; 221 | ExpectEq(v, 2, __LINE__); 222 | ExpectEq(--v, 1, __LINE__); 223 | ExpectEq(v, 1, __LINE__); 224 | ExpectEq(--v, 0, __LINE__); 225 | } 226 | 227 | int TestCharLiteralAccess() { 228 | ExpectEq(*"compilium", 'c', __LINE__); 229 | ExpectEq(*("compilium" + 0), 'c', __LINE__); 230 | ExpectEq(*("compilium" + 1), 'o', __LINE__); 231 | ExpectEq(*("compilium" + 2), 'm', __LINE__); 232 | ExpectEq(*("compilium" + 3), 'p', __LINE__); 233 | ExpectEq(*("compilium" + 4), 'i', __LINE__); 234 | ExpectEq(*("compilium" + 5), 'l', __LINE__); 235 | ExpectEq(*("compilium" + 6), 'i', __LINE__); 236 | ExpectEq(*("compilium" + 7), 'u', __LINE__); 237 | ExpectEq(*("compilium" + 8), 'm', __LINE__); 238 | ExpectEq(*("compilium" + 9), 0, __LINE__); 239 | } 240 | 241 | void TestReassign() { 242 | char c = 2; 243 | c = c + 1; 244 | ExpectEq(c, 3, __LINE__); 245 | c = 0; 246 | ExpectEq(c, 0, __LINE__); 247 | c = 2; 248 | ExpectEq(c, 2, __LINE__); 249 | c = 0; 250 | ExpectEq(c, 0, __LINE__); 251 | c = 2 + 3; 252 | ExpectEq(c, 5, __LINE__); 253 | ExpectEq(c + 2, 7, __LINE__); 254 | } 255 | 256 | int UnreachableReturn() { 257 | ; 258 | return 2; 259 | return 0; 260 | } 261 | 262 | void TestPtrOfVar() { 263 | int a; 264 | int* p; 265 | a = 1; 266 | p = &a; 267 | *p = 5; 268 | ExpectEq(a, 5, __LINE__); 269 | a = 1; 270 | p = &a; 271 | a = 3; 272 | ExpectEq(*p, 3, __LINE__); 273 | p = &a; 274 | ExpectEq(p ? 1 : 0, 1, __LINE__); 275 | } 276 | 277 | void TestConstTypeSpec() { const int a = 0; } 278 | 279 | void TestBreak() { 280 | int v; 281 | v = 1; 282 | for (;;) { 283 | for (;;) { 284 | v *= 2; 285 | break; 286 | } 287 | v *= 3; 288 | break; 289 | } 290 | ExpectEq(v, 6, __LINE__); 291 | 292 | v = 1; 293 | while (1) { 294 | while (1) { 295 | v *= 3; 296 | break; 297 | } 298 | v *= 5; 299 | break; 300 | } 301 | ExpectEq(v, 15, __LINE__); 302 | } 303 | 304 | void TestShortCircuitEval() { 305 | int v = 1; 306 | v++ && 0 && v++; 307 | ExpectEq(v, 2, __LINE__); 308 | v = 1; 309 | 1 || v++; 310 | ExpectEq(v, 1, __LINE__); 311 | } 312 | 313 | int main(int argc, char** argv) { 314 | TestShortCircuitEval(); 315 | TestBreak(); 316 | TestConstTypeSpec(); 317 | TestPtrOfVar(); 318 | TestReassign(); 319 | TestCharLiteralAccess(); 320 | TestInc(); 321 | 322 | ExpectEq('C', 67, __LINE__); 323 | 324 | ExpectEq(UnreachableReturn(), 2, __LINE__); 325 | 326 | ExpectEq(+0, 0, __LINE__); 327 | ExpectEq(1 - -2, 3, __LINE__); 328 | 329 | ExpectEq(0, 0, __LINE__); 330 | ExpectEq(1, 1, __LINE__); 331 | 332 | ExpectEq(0xF, 15, __LINE__); 333 | ExpectEq(0xFF, 255, __LINE__); 334 | ExpectEq(0x010, 16, __LINE__); 335 | ExpectEq(0x100, 256, __LINE__); 336 | 337 | ExpectEq(17, 17, __LINE__); 338 | ExpectEq(017, 15, __LINE__); 339 | 340 | ExpectEq((0), 0, __LINE__); 341 | ExpectEq((1), 1, __LINE__); 342 | ExpectEq((1 ? 2 : 3), 2, __LINE__); 343 | ExpectEq((0 ? 1 : 2), 2, __LINE__); 344 | ExpectEq(2 * (3 + 4), 14, __LINE__); 345 | 346 | ExpectEq(~10 & 15, 5, __LINE__); 347 | ExpectEq(~5 & 15, 10, __LINE__); 348 | 349 | ExpectEq(!0, 1, __LINE__); 350 | ExpectEq(!1, 0, __LINE__); 351 | ExpectEq(!2, 0, __LINE__); 352 | 353 | ExpectEq(3 * 4, 12, __LINE__); 354 | ExpectEq(3 * 4 * 5, 60, __LINE__); 355 | ExpectEq(365 / 7, 52, __LINE__); 356 | ExpectEq(365 / 7 / 8, 6, __LINE__); 357 | ExpectEq(365 % 7, 1, __LINE__); 358 | ExpectEq(365 % 7 % 8, 1, __LINE__); 359 | 360 | ExpectEq(100 + 7, 107, __LINE__); 361 | ExpectEq(3 + 5 + 7 + 9, 24, __LINE__); 362 | ExpectEq(100 - 7, 93, __LINE__); 363 | ExpectEq(1 + 3 - 5 - 7 + 9, 1, __LINE__); 364 | 365 | ExpectEq(3 << 2, 12, __LINE__); 366 | ExpectEq(3 >> 2, 0, __LINE__); 367 | ExpectEq(17 >> 2, 4, __LINE__); 368 | ExpectEq(17 >> 2 >> 1, 2, __LINE__); 369 | 370 | ExpectEq(3 < 5, 1, __LINE__); 371 | ExpectEq(5 < 3, 0, __LINE__); 372 | ExpectEq(7 < 7, 0, __LINE__); 373 | 374 | ExpectEq(3 <= 5, 1, __LINE__); 375 | ExpectEq(5 <= 3, 0, __LINE__); 376 | ExpectEq(7 <= 7, 1, __LINE__); 377 | 378 | ExpectEq(3 > 5, 0, __LINE__); 379 | ExpectEq(5 > 3, 1, __LINE__); 380 | ExpectEq(7 > 7, 0, __LINE__); 381 | 382 | ExpectEq(3 >= 5, 0, __LINE__); 383 | ExpectEq(5 >= 3, 1, __LINE__); 384 | ExpectEq(7 >= 7, 1, __LINE__); 385 | 386 | ExpectEq(3 == 5, 0, __LINE__); 387 | ExpectEq(5 == 3, 0, __LINE__); 388 | ExpectEq(7 == 7, 1, __LINE__); 389 | 390 | ExpectEq(3 != 5, 1, __LINE__); 391 | ExpectEq(5 != 3, 1, __LINE__); 392 | ExpectEq(7 != 7, 0, __LINE__); 393 | 394 | ExpectEq(0 & 0, 0, __LINE__); 395 | ExpectEq(0 & 1, 0, __LINE__); 396 | ExpectEq(1 & 0, 0, __LINE__); 397 | ExpectEq(1 & 1, 1, __LINE__); 398 | 399 | ExpectEq(10 & 6, 2, __LINE__); 400 | 401 | ExpectEq(0 ^ 0, 0, __LINE__); 402 | ExpectEq(0 ^ 1, 1, __LINE__); 403 | ExpectEq(1 ^ 0, 1, __LINE__); 404 | ExpectEq(1 ^ 1, 0, __LINE__); 405 | 406 | ExpectEq(10 ^ 6, 12, __LINE__); 407 | 408 | ExpectEq(0 | 0, 0, __LINE__); 409 | ExpectEq(0 | 1, 1, __LINE__); 410 | ExpectEq(1 | 0, 1, __LINE__); 411 | ExpectEq(1 | 1, 1, __LINE__); 412 | 413 | ExpectEq(10 | 6, 14, __LINE__); 414 | 415 | ExpectEq(0 && 0, 0, __LINE__); 416 | ExpectEq(0 && 1, 0, __LINE__); 417 | ExpectEq(1 && 0, 0, __LINE__); 418 | ExpectEq(1 && 1, 1, __LINE__); 419 | 420 | ExpectEq(10 && 6, 1, __LINE__); 421 | 422 | ExpectEq(0 || 0, 0, __LINE__); 423 | ExpectEq(0 || 1, 1, __LINE__); 424 | ExpectEq(1 || 0, 1, __LINE__); 425 | ExpectEq(1 || 1, 1, __LINE__); 426 | 427 | ExpectEq(10 || 6, 1, __LINE__); 428 | 429 | ExpectEq(0 && 0 || 0, 0, __LINE__); 430 | ExpectEq(0 && 0 || 1, 1, __LINE__); 431 | ExpectEq(0 && 1 || 0, 0, __LINE__); 432 | ExpectEq(0 && 1 || 1, 1, __LINE__); 433 | ExpectEq(1 && 0 || 0, 0, __LINE__); 434 | ExpectEq(1 && 0 || 1, 1, __LINE__); 435 | ExpectEq(1 && 1 || 0, 1, __LINE__); 436 | ExpectEq(1 && 1 || 1, 1, __LINE__); 437 | 438 | ExpectEq(0 || 0 && 0, 0, __LINE__); 439 | ExpectEq(0 || 0 && 1, 0, __LINE__); 440 | ExpectEq(0 || 1 && 0, 0, __LINE__); 441 | ExpectEq(0 || 1 && 1, 1, __LINE__); 442 | ExpectEq(1 || 0 && 0, 1, __LINE__); 443 | ExpectEq(1 || 0 && 1, 1, __LINE__); 444 | ExpectEq(1 || 1 && 0, 1, __LINE__); 445 | ExpectEq(1 || 1 && 1, 1, __LINE__); 446 | 447 | ExpectEq(0 ? 3 : 5, 5, __LINE__); 448 | ExpectEq(1 ? 3 : 5, 3, __LINE__); 449 | ExpectEq(2 ? 3 : 5, 3, __LINE__); 450 | 451 | ExpectEq((2, 3), 3, __LINE__); 452 | ExpectEq((2 * 3, 5 + 7), 12, __LINE__); 453 | 454 | ExpectEq(3 * 4 + 5, 17, __LINE__); 455 | ExpectEq(3 + 4 * 5, 23, __LINE__); 456 | ExpectEq(3 + 4 * 5 - 9, 14, __LINE__); 457 | ExpectEq(3 + 14 / 2, 10, __LINE__); 458 | ExpectEq(12 + 17 % 7, 15, __LINE__); 459 | ExpectEq(1 + 2 << 3, 24, __LINE__); 460 | ExpectEq(-3 * -4 + -5, 7, __LINE__); 461 | 462 | ExpectEq(TestIfStmtTrueCase(), 3, __LINE__); 463 | ExpectEq(TestIfStmtFalseCase(), 5, __LINE__); 464 | ExpectEq(TestIfStmtWithCompStmt(), 3, __LINE__); 465 | ExpectEq(TestIfStmtElseCase(), 3, __LINE__); 466 | 467 | ExpectEq(TestIfStmtNestedElseCase(0), 7, __LINE__); 468 | ExpectEq(TestIfStmtNestedElseCase(1), 14, __LINE__); 469 | ExpectEq(TestIfStmtNestedElseCase(2), 21, __LINE__); 470 | ExpectEq(TestIfStmtNestedElseCase(3), 14, __LINE__); 471 | ExpectEq(TestIfStmtNestedElseCase(4), 35, __LINE__); 472 | ExpectEq(TestIfStmtNestedElseCase(5), 14, __LINE__); 473 | ExpectEq(TestIfStmtNestedElseCase(6), 21, __LINE__); 474 | ExpectEq(TestIfStmtNestedElseCase(7), 14, __LINE__); 475 | 476 | ExpectEq(TestInitializer(), 25, __LINE__); 477 | 478 | ExpectEq(TestCompAssignPlusEq(3, 5), 8, __LINE__); 479 | ExpectEq(TestCompAssignPlusEq(-5, 5), 0, __LINE__); 480 | 481 | ExpectEq(TestCompAssignMinusEq(3, 5), -2, __LINE__); 482 | ExpectEq(TestCompAssignMinusEq(-5, 5), -10, __LINE__); 483 | 484 | ExpectEq(TestCompAssignMulEq(3, 0), 0, __LINE__); 485 | ExpectEq(TestCompAssignMulEq(0, 5), 0, __LINE__); 486 | ExpectEq(TestCompAssignMulEq(3, 5), 15, __LINE__); 487 | ExpectEq(TestCompAssignMulEq(5, 3), 15, __LINE__); 488 | 489 | ExpectEq(TestCompAssignDivEq(8, 2), 4, __LINE__); 490 | ExpectEq(TestCompAssignDivEq(13, 5), 2, __LINE__); 491 | 492 | ExpectEq(TestCompAssignModEq(8, 2), 0, __LINE__); 493 | ExpectEq(TestCompAssignModEq(13, 5), 3, __LINE__); 494 | 495 | ExpectEq(TestCompAssignModEq(13, 5), 3, __LINE__); 496 | 497 | TestSizeof(); 498 | 499 | TestStructVecSum(1, 2, 3, 4, 4, 6); 500 | TestStructVecSum(2, 3, 5, 7, 7, 10); 501 | 502 | TestStructVecSumRef(1, 2, 3, 4, 4, 6); 503 | TestStructVecSumRef(2, 3, 5, 7, 7, 10); 504 | 505 | ExpectEq(TestArray(2, 3, 5, 0), 2, __LINE__); 506 | ExpectEq(TestArray(2, 3, 5, 1), 3, __LINE__); 507 | ExpectEq(TestArray(2, 3, 5, 2), 5, __LINE__); 508 | 509 | ExpectEq(TestArray(7, 11, 13, 0), 7, __LINE__); 510 | ExpectEq(TestArray(7, 11, 13, 1), 11, __LINE__); 511 | ExpectEq(TestArray(7, 11, 13, 2), 13, __LINE__); 512 | 513 | ExpectEq(TestWhileLoop(7, 11, 2, 5), 10, __LINE__); 514 | ExpectEq(TestWhileLoop(11, 7, 2, 5), 0, __LINE__); 515 | ExpectEq(TestWhileLoop(7, 11, 1, 5), 20, __LINE__); 516 | 517 | ExpectEq(TestForLoop(7, 11, 2, 5), 10, __LINE__); 518 | ExpectEq(TestForLoop(11, 7, 2, 5), 0, __LINE__); 519 | ExpectEq(TestForLoop(7, 11, 1, 5), 20, __LINE__); 520 | 521 | ExpectEq(TestCompAssignLShift(1, 0), 1, __LINE__); 522 | ExpectEq(TestCompAssignLShift(1, 3), 8, __LINE__); 523 | ExpectEq(TestCompAssignLShift(5, 0), 5, __LINE__); 524 | ExpectEq(TestCompAssignLShift(5, 3), 40, __LINE__); 525 | 526 | ExpectEq(TestCompAssignRShift(32, 0), 32, __LINE__); 527 | ExpectEq(TestCompAssignRShift(32, 3), 4, __LINE__); 528 | ExpectEq(TestCompAssignRShift(32, 6), 0, __LINE__); 529 | ExpectEq(TestCompAssignRShift(7, 2), 1, __LINE__); 530 | 531 | puts("PASS all stmt tests"); 532 | return 0; 533 | } 534 | -------------------------------------------------------------------------------- /examples/fib.c: -------------------------------------------------------------------------------- 1 | // https://oeis.org/A000045 2 | #include 3 | 4 | int fib(int n) { 5 | // assume n >= 0 6 | if (n <= 1) return n; 7 | return fib(n - 1) + fib(n - 2); 8 | } 9 | 10 | int main() { 11 | for (int i = 0; i < 41; i++) { 12 | printf("fib[%2d] = %d\n", i, fib(i)); 13 | } 14 | return 0; 15 | } 16 | -------------------------------------------------------------------------------- /examples/gameoflife.c: -------------------------------------------------------------------------------- 1 | void putchar(char c); 2 | void usleep(int us); 3 | 4 | int main() { 5 | int x; 6 | int y; 7 | int p; 8 | int q; 9 | int p; 10 | int col; 11 | int count; 12 | int map[32][32]; 13 | int size = 32; 14 | int mask = size - 1; 15 | 16 | for (y = 0; y < size; y++) { 17 | for (x = 0; x < size; x++) { 18 | map[y][x] = 0; 19 | } 20 | } 21 | 22 | int cx = size / 2; 23 | int cy = size / 2; 24 | 25 | map[cy - 1][cx - 3] = 1; 26 | map[cy - 1][cx + 2] = 1; 27 | 28 | map[cy][cx - 4] = 1; 29 | map[cy][cx - 3] = 1; 30 | map[cy][cx + 2] = 1; 31 | map[cy][cx + 3] = 1; 32 | 33 | map[cy + 1][cx - 3] = 1; 34 | map[cy + 1][cx + 2] = 1; 35 | 36 | for (1; 1; 1) { 37 | for (y = 0; y < size; y++) { 38 | for (x = 0; x < size; x++) { 39 | count = 0; 40 | for (p = -1; p <= 1; p++) 41 | for (q = -1; q <= 1; q++) 42 | count += map[(y + p) & mask][(x + q) & mask] & 1; 43 | count -= map[y][x] & 1; 44 | if ((map[y][x] && (count == 2 || count == 3)) || 45 | (!map[y][x] && count == 3)) 46 | map[y][x] = map[y][x] | 2; 47 | } 48 | } 49 | for (y = 0; y < size; y++) { 50 | for (x = 0; x < size; x++) { 51 | map[y][x] = map[y][x] >> 1; 52 | if (map[y][x]) 53 | putchar('*'); 54 | else 55 | putchar(' '); 56 | putchar(' '); 57 | } 58 | putchar('\n'); 59 | } 60 | usleep(1000 * 100 * 5); 61 | putchar('\n'); 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /examples/gen_constsum.js: -------------------------------------------------------------------------------- 1 | const N = 10000; 2 | 3 | console.log(` 4 | #include 5 | int main() { 6 | for(int i = 0; i < 10000; i++){ 7 | printf("%3d: 1/1*2/2*3/3* ... ${N}/${N} = %d\\n", 8 | i, ${Array(N).fill().map((e,i) => `${i+1}/${i+1}`).join("*\n")}); 9 | } 10 | return 0; 11 | }`); 12 | -------------------------------------------------------------------------------- /examples/hello.c: -------------------------------------------------------------------------------- 1 | int puts(char *s); 2 | 3 | int main(int argc, char **argv) { 4 | puts("Hello compilium world!"); 5 | return 0; 6 | } 7 | -------------------------------------------------------------------------------- /examples/jsondump.c: -------------------------------------------------------------------------------- 1 | // https://www.json.org/json-en.html 2 | #include 3 | #include 4 | #include 5 | 6 | #define PATH_SIZE 128 7 | #define PATH_LEN_STACK_SIZE 16 8 | 9 | char path[PATH_SIZE]; 10 | int path_used; 11 | int path_len_stack[PATH_LEN_STACK_SIZE]; 12 | int path_len_stack_used; 13 | 14 | void PushPath(const char *s) { 15 | if (path_len_stack_used >= PATH_LEN_STACK_SIZE) { 16 | printf("No more path len stack\n"); 17 | exit(1); 18 | } 19 | int slen = strlen(s); 20 | if (path_used + slen >= PATH_SIZE) { 21 | printf("Too long path\n"); 22 | exit(1); 23 | } 24 | path_len_stack[path_len_stack_used++] = path_used; 25 | memcpy(&path[path_used], s, slen); 26 | path_used += slen; 27 | } 28 | 29 | void PopPath() { 30 | if (path_len_stack_used <= 0) { 31 | printf("Cannot pop path\n"); 32 | exit(1); 33 | } 34 | path_used = path_len_stack[--path_len_stack_used]; 35 | } 36 | 37 | void PrintPath() { printf("%.*s", path_used, path); } 38 | 39 | int TryValueBool(int c) { 40 | if (c == 't') { 41 | if ((c = getchar() != 'r') || (c = getchar()) != 'u' || 42 | (c = getchar()) != 'e') { 43 | printf("Unexpected char %c\n", c); 44 | exit(1); 45 | } 46 | PrintPath(); 47 | printf(" = true\n"); 48 | return 1; 49 | } 50 | if (c == 'f') { 51 | if ((c = getchar() != 'a') || (c = getchar()) != 'l' || 52 | (c = getchar()) != 's' || (c = getchar()) != 'e') { 53 | printf("Unexpected char %c\n", c); 54 | exit(1); 55 | } 56 | PrintPath(); 57 | printf(" = false\n"); 58 | return 1; 59 | } 60 | return 0; 61 | } 62 | int TryValueNull(int c) { 63 | if (c == 'n') { 64 | if ((c = getchar() != 'u') || (c = getchar()) != 'l' || 65 | (c = getchar()) != 'l') { 66 | printf("Unexpected char %c\n", c); 67 | exit(1); 68 | } 69 | PrintPath(); 70 | printf(" = null\n"); 71 | return 1; 72 | } 73 | return 0; 74 | } 75 | 76 | int ReadWhiteSpaces(int c) { 77 | // returns first non-whitespace character 78 | while (c == ' ') { 79 | c = getchar(); 80 | } 81 | return c; 82 | } 83 | 84 | int ReadElement(); 85 | int TryValueArray(int c) { 86 | if (c != '[') return 0; 87 | c = ReadWhiteSpaces(getchar()); 88 | int index = 0; 89 | char buf[16]; 90 | for (;;) { 91 | if (c == ']') break; 92 | if (c == ',') { 93 | c = ReadWhiteSpaces(getchar()); 94 | index++; 95 | continue; 96 | } 97 | snprintf(buf, sizeof(buf), "[%d]", index); 98 | PushPath(buf); 99 | c = ReadElement(c); 100 | PopPath(); 101 | } 102 | return 1; 103 | } 104 | 105 | void ParseValue(int c) { 106 | if (TryValueBool(c) || TryValueNull(c) || TryValueArray(c)) { 107 | return; 108 | } 109 | printf("Unexpected char %c\n", c); 110 | exit(1); 111 | } 112 | 113 | int ReadElement(int c) { 114 | // returns first non-element character 115 | ParseValue(ReadWhiteSpaces(c)); 116 | return ReadWhiteSpaces(getchar()); 117 | } 118 | 119 | int main() { 120 | int c; 121 | ParseValue(getchar()); 122 | c = getchar(); 123 | if (c != '\n' && c != EOF) { 124 | printf("Unexpected char %c (%d)\n", c, c); 125 | } 126 | return 0; 127 | } 128 | -------------------------------------------------------------------------------- /examples/jsondump_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | function test_stdout { 4 | input="$1" 5 | expected_stdout="$2" 6 | testname="$3" 7 | printf "$expected_stdout\n" > expected.stdout 8 | printf "$input\n" > testinput.c 9 | cat testinput.c | ./jsondump.bin > out.stdout || { \ 10 | echo "$input" > failcase.txt; \ 11 | echo "Run failed."; \ 12 | exit 1; } 13 | diff -y expected.stdout out.stdout \ 14 | && printf "\nPASS $testname\n" \ 15 | || { printf "\nFAIL $testname: stdout diff\n"; exit 1; } 16 | } 17 | 18 | test_stdout \ 19 | "`cat << EOS 20 | true 21 | EOS 22 | `" \ 23 | "`cat << EOS 24 | = true 25 | EOS 26 | `" \ 27 | 'true value' 28 | 29 | test_stdout \ 30 | "`cat << EOS 31 | false 32 | EOS 33 | `" \ 34 | "`cat << EOS 35 | = false 36 | EOS 37 | `" \ 38 | 'false value' 39 | 40 | test_stdout \ 41 | "`cat << EOS 42 | null 43 | EOS 44 | `" \ 45 | "`cat << EOS 46 | = null 47 | EOS 48 | `" \ 49 | 'null value' 50 | 51 | test_stdout \ 52 | "`cat << EOS 53 | [null] 54 | EOS 55 | `" \ 56 | "`cat << EOS 57 | [0] = null 58 | EOS 59 | `" \ 60 | 'Array1' 61 | 62 | test_stdout \ 63 | "`cat << EOS 64 | [null, true] 65 | EOS 66 | `" \ 67 | "`cat << EOS 68 | [0] = null 69 | [1] = true 70 | EOS 71 | `" \ 72 | 'Array2' 73 | 74 | test_stdout \ 75 | "`cat << EOS 76 | [[null], [true]] 77 | EOS 78 | `" \ 79 | "`cat << EOS 80 | [0][0] = null 81 | [1][0] = true 82 | EOS 83 | `" \ 84 | 'Nested array' 85 | -------------------------------------------------------------------------------- /examples/measure_collatz.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | echo "building..." 3 | make collatz.bin >/dev/null 2>&1 4 | make collatz.host.bin collatz.host_o3.bin >/dev/null 2>&1 5 | echo "running collatz.bin (by compilium)..." 6 | ( time ./collatz.bin | tee collatz.stdout.txt ) 2>&1 | grep real 7 | echo "running collatz.host.bin (by host compiler)..." 8 | ( time ./collatz.host.bin | tee collatz.host.stdout.txt ) 2>&1 | grep real 9 | echo "running collatz.host_o3.bin (by host compiler with -O3)..." 10 | ( time ./collatz.host_o3.bin | tee collatz.host_o3.stdout.txt ) 2>&1 | grep real 11 | 12 | diff -u collatz.stdout.txt collatz.host.stdout.txt || { echo "stdout diff"; false; } 13 | diff -u collatz.stdout.txt collatz.host_o3.stdout.txt || { echo "stdout diff"; false; } 14 | echo "OK" 15 | 16 | -------------------------------------------------------------------------------- /examples/measure_constsum.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | echo "building..." 3 | make constsum.bin >/dev/null 2>&1 4 | make constsum.host.bin constsum.host_o3.bin >/dev/null 2>&1 5 | echo "running constsum.bin (by compilium)..." 6 | ( time ./constsum.bin ) 2>&1 | grep real 7 | echo "running constsum.host.bin (by host compiler)..." 8 | ( time ./constsum.host.bin ) 2>&1 | grep real 9 | echo "running constsum.host_o3.bin (by host compiler with -O3)..." 10 | ( time ./constsum.host_o3.bin ) 2>&1 | grep real 11 | -------------------------------------------------------------------------------- /examples/measure_fib.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | echo "building..." 3 | make fib.bin >/dev/null 2>&1 4 | make fib.host.bin fib.host_o3.bin >/dev/null 2>&1 5 | echo "running fib.bin (by compilium)..." 6 | ( time ./fib.bin ) 2>&1 | grep real 7 | echo "running fib.host.bin (by host compiler)..." 8 | ( time ./fib.host.bin ) 2>&1 | grep real 9 | echo "running fib.host_o3.bin (by host compiler with -O3)..." 10 | ( time ./fib.host_o3.bin ) 2>&1 | grep real 11 | -------------------------------------------------------------------------------- /examples/measure_pi.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | echo "building..." 3 | make pi.bin >/dev/null 2>&1 4 | make pi.host.bin pi.host_o3.bin >/dev/null 2>&1 5 | echo "running pi.bin (by compilium)..." 6 | ( time ./pi.bin ) 2>&1 | grep real 7 | echo "running pi.host.bin (by host compiler)..." 8 | ( time ./pi.host.bin ) 2>&1 | grep real 9 | echo "running pi.host_o3.bin (by host compiler with -O3)..." 10 | ( time ./pi.host_o3.bin ) 2>&1 | grep real 11 | -------------------------------------------------------------------------------- /examples/pi.c: -------------------------------------------------------------------------------- 1 | int write(int fd, const void *, int); 2 | void exit(int); 3 | 4 | void printf04d(int v) { 5 | char s[4]; 6 | for (int i = 0; i < 4; i++) { 7 | s[3 - i] = v % 10 + '0'; 8 | v /= 10; 9 | } 10 | write(1, s, 4); 11 | } 12 | 13 | int nume[52514]; 14 | int i; 15 | int n; 16 | int carry; 17 | int digit; 18 | int base; 19 | int denom; 20 | int first; 21 | 22 | int main(int argc, char **argv) { 23 | base = 10000; 24 | for (n = 52500; n > 0; n -= 14) { 25 | carry %= base; 26 | digit = carry; 27 | for (i = n - 1; i > 0; --i) { 28 | denom = 2 * i - 1; 29 | carry = carry * i + base * (first ? nume[i] : (base / 5)); 30 | nume[i] = carry % denom; 31 | carry /= denom; 32 | } 33 | printf04d(digit + carry / base); 34 | first = 1; 35 | } 36 | write(1, "\n", 1); 37 | exit(0); 38 | return 0; 39 | } 40 | -------------------------------------------------------------------------------- /generator.c: -------------------------------------------------------------------------------- 1 | #include "compilium.h" 2 | 3 | static void GenerateForNodeRValue(struct Node *node); 4 | 5 | static struct Node *str_list; 6 | static int label_to_break; 7 | static int label_to_continue; 8 | 9 | static int GetLabelNumber() { 10 | static int label_number; 11 | return ++label_number; 12 | } 13 | 14 | static void EmitConvertToBool(int dst, int src) { 15 | // This code also sets zero flag as boolean value 16 | printf("cmp %s, 0\n", reg_names_64[src]); 17 | printf("setnz %s\n", reg_names_8[src]); 18 | printf("movzx %s, %s\n", reg_names_64[dst], reg_names_8[src]); 19 | } 20 | 21 | static void EmitCompareIntegers(int dst, int left, int right, const char *cc) { 22 | printf("cmp %s, %s\n", reg_names_64[left], reg_names_64[right]); 23 | printf("set%s %s\n", cc, reg_names_8[dst]); 24 | printf("movzx %s, %s\n", reg_names_64[dst], reg_names_8[dst]); 25 | } 26 | 27 | static void EmitMoveToMemory(struct Node *op, int dst, int src, int size) { 28 | if (size == 8) { 29 | printf("mov [%s], %s\n", reg_names_64[dst], reg_names_64[src]); 30 | return; 31 | } 32 | if (size == 4) { 33 | printf("mov [%s], %s # 4 byte store\n", reg_names_64[dst], 34 | reg_names_32[src]); 35 | return; 36 | } 37 | if (size == 1) { 38 | printf("mov [%s], %s\n", reg_names_64[dst], reg_names_8[src]); 39 | return; 40 | } 41 | ErrorWithToken(op, "Assigning %d bytes is not implemented.", size); 42 | } 43 | 44 | static void EmitMoveFromMemory(struct Node *op, int dst, int src, int size) { 45 | if (size == 8) { 46 | printf("mov %s, [%s]\n", reg_names_64[dst], reg_names_64[src]); 47 | return; 48 | } 49 | if (size == 4) { 50 | printf("movsxd %s, dword ptr [%s]\n", reg_names_64[dst], reg_names_64[src]); 51 | return; 52 | } 53 | if (size == 1) { 54 | printf("movsxb %s, byte ptr [%s]\n", reg_names_64[dst], reg_names_64[src]); 55 | return; 56 | } 57 | ErrorWithToken(op, "Assigning %d bytes is not implemented.", size); 58 | } 59 | 60 | static void EmitAddToMemory(struct Node *op, int dst, int src, int size) { 61 | if (size == 8) { 62 | printf("add qword ptr [%s], %s\n", reg_names_64[dst], reg_names_64[src]); 63 | return; 64 | } 65 | if (size == 4) { 66 | printf("add dword ptr [%s], %s\n", reg_names_64[dst], reg_names_32[src]); 67 | return; 68 | } 69 | if (size == 1) { 70 | printf("add byte ptr [%s], %s\n", reg_names_64[dst], reg_names_8[src]); 71 | return; 72 | } 73 | ErrorWithToken(op, "Assigning %d bytes is not implemented.", size); 74 | } 75 | 76 | static void EmitSubFromMemory(struct Node *op, int dst, int src, int size) { 77 | if (size == 8) { 78 | printf("sub qword ptr [%s], %s\n", reg_names_64[dst], reg_names_64[src]); 79 | return; 80 | } 81 | if (size == 4) { 82 | printf("sub dword ptr [%s], %s\n", reg_names_64[dst], reg_names_32[src]); 83 | return; 84 | } 85 | if (size == 1) { 86 | printf("sub byte ptr [%s], %s\n", reg_names_64[dst], reg_names_8[src]); 87 | return; 88 | } 89 | ErrorWithToken(op, "Assigning %d bytes is not implemented.", size); 90 | } 91 | 92 | static void EmitDecMemory(struct Node *op, int dst, int size) { 93 | if (size == 8) { 94 | printf("dec qword ptr [%s]\n", reg_names_64[dst]); 95 | return; 96 | } 97 | if (size == 4) { 98 | printf("dec dword ptr [%s]\n", reg_names_64[dst]); 99 | return; 100 | } 101 | if (size == 1) { 102 | printf("dec byte ptr [%s]\n", reg_names_64[dst]); 103 | return; 104 | } 105 | ErrorWithToken(op, "Assigning %d bytes is not implemented.", size); 106 | } 107 | 108 | static void EmitIncMemory(struct Node *op, int dst, int size) { 109 | if (size == 8) { 110 | printf("inc qword ptr [%s]\n", reg_names_64[dst]); 111 | return; 112 | } 113 | if (size == 4) { 114 | printf("inc dword ptr [%s]\n", reg_names_64[dst]); 115 | return; 116 | } 117 | if (size == 1) { 118 | printf("inc byte ptr [%s]\n", reg_names_64[dst]); 119 | return; 120 | } 121 | ErrorWithToken(op, "Assigning %d bytes is not implemented.", size); 122 | } 123 | 124 | static void EmitMulToMemory(struct Node *op, int dst, int src, int size) { 125 | if (size == 4) { 126 | // rdx:rax <- rax * r/m 127 | printf("xor rdx, rdx\n"); 128 | printf("mov rax, %s\n", reg_names_64[dst]); 129 | printf("mov eax, [rax]\n"); 130 | printf("imul %s\n", reg_names_64[src]); 131 | printf("mov [%s], eax\n", reg_names_64[dst]); 132 | return; 133 | } 134 | ErrorWithToken(op, "Assigning %d bytes is not implemented.", size); 135 | } 136 | 137 | static void EmitDivToMemory(struct Node *op, int dst, int src, int size) { 138 | if (size == 4) { 139 | // rax <- rdx:rax / r/m 140 | printf("xor rdx, rdx\n"); 141 | printf("mov eax, [%s]\n", reg_names_64[dst]); 142 | printf("idiv %s\n", reg_names_64[src]); 143 | printf("mov [%s], eax\n", reg_names_64[dst]); 144 | return; 145 | } 146 | ErrorWithToken(op, "Assigning %d bytes is not implemented.", size); 147 | } 148 | 149 | static void EmitModToMemory(struct Node *op, int dst, int src, int size) { 150 | if (size == 4) { 151 | // rdx <- rdx:rax % r/m 152 | printf("xor rdx, rdx\n"); 153 | printf("mov eax, [%s]\n", reg_names_64[dst]); 154 | printf("idiv %s\n", reg_names_64[src]); 155 | printf("mov [%s], edx\n", reg_names_64[dst]); 156 | return; 157 | } 158 | ErrorWithToken(op, "Assigning %d bytes is not implemented.", size); 159 | } 160 | 161 | static void EmitLShiftMemory(struct Node *op, int dst, int src, int size) { 162 | if (size == 4) { 163 | printf("mov ecx, %s\n", reg_names_32[src]); 164 | printf("shl dword ptr [%s], cl\n", reg_names_64[dst]); 165 | return; 166 | } 167 | ErrorWithToken(op, "Assigning %d bytes is not implemented.", size); 168 | } 169 | 170 | static void EmitRShiftMemory(struct Node *op, int dst, int src, int size) { 171 | if (size == 4) { 172 | printf("mov ecx, %s\n", reg_names_32[src]); 173 | printf("shr dword ptr [%s], cl\n", reg_names_64[dst]); 174 | return; 175 | } 176 | ErrorWithToken(op, "Assigning %d bytes is not implemented.", size); 177 | } 178 | 179 | const char *GetParamRegName(struct Node *type, int idx) { 180 | assert(0 <= idx && idx < NUM_OF_PARAM_REGISTERS); 181 | int size = GetSizeOfType(type); 182 | if (size == 8) return param_reg_names_64[idx]; 183 | if (size == 4) return param_reg_names_32[idx]; 184 | if (size == 1) return param_reg_names_8[idx]; 185 | ErrorWithToken(GetIdentifierTokenFromTypeAttr(type), 186 | "Assigning %d bytes is not implemented.", size); 187 | } 188 | 189 | static void GenerateForNode(struct Node *node) { 190 | if (node->type == kASTList && !node->op) { 191 | for (int i = 0; i < GetSizeOfList(node); i++) { 192 | GenerateForNode(GetNodeAt(node, i)); 193 | } 194 | return; 195 | } 196 | if (node->type == kASTExprFuncCall) { 197 | printf("sub rsp, %d # alloc stack frame\n", node->stack_size_needed); 198 | int i; 199 | for (i = 1; i <= NUM_OF_SCRATCH_REGS; i++) { 200 | printf("push %s # save scratch regs\n", reg_names_64[i]); 201 | } 202 | GenerateForNodeRValue(node->func_expr); 203 | printf("push %s\n", reg_names_64[node->func_expr->reg]); 204 | assert(GetSizeOfList(node->arg_expr_list) <= NUM_OF_PARAM_REGISTERS); 205 | for (i = 0; i < GetSizeOfList(node->arg_expr_list); i++) { 206 | struct Node *n = GetNodeAt(node->arg_expr_list, i); 207 | GenerateForNodeRValue(n); 208 | printf("push %s\n", reg_names_64[n->reg]); 209 | } 210 | for (i--; i >= 0; i--) { 211 | printf("pop %s\n", param_reg_names_64[i]); 212 | } 213 | printf("pop rax\n"); 214 | printf("call rax\n"); 215 | for (i = NUM_OF_SCRATCH_REGS; i >= 1; i--) { 216 | printf("pop %s # restore scratch regs\n", reg_names_64[i]); 217 | } 218 | int ret_type_size = GetSizeOfType(node->expr_type); 219 | if (ret_type_size == 4) { 220 | printf("movsxd %s, eax\n", reg_names_64[node->reg]); 221 | } else if (ret_type_size == 8) { 222 | printf("mov %s, rax\n", reg_names_64[node->reg]); 223 | } else if (ret_type_size == 0) { 224 | // Return type is "void". Do nothing. 225 | } else { 226 | assert(false); 227 | } 228 | printf("add rsp, %d # restore stack frame\n", node->stack_size_needed); 229 | return; 230 | } else if (node->type == kASTFuncDef) { 231 | const char *func_name = CreateTokenStr(node->func_name_token); 232 | printf(".global %s%s\n", symbol_prefix, func_name); 233 | printf("%s%s:\n", symbol_prefix, func_name); 234 | printf("push rbp\n"); 235 | printf("mov rbp, rsp\n"); 236 | printf("push r12\n"); 237 | printf("push r13\n"); 238 | printf("push r14\n"); 239 | printf("push r15\n"); 240 | struct Node *arg_var_list = node->arg_var_list; 241 | assert(arg_var_list); 242 | assert(GetSizeOfList(arg_var_list) <= NUM_OF_PARAM_REGISTERS); 243 | for (int i = 0; i < GetSizeOfList(arg_var_list); i++) { 244 | struct Node *arg_var = GetNodeAt(arg_var_list, i); 245 | if (!arg_var) continue; 246 | const char *param_reg_name = GetParamRegName(arg_var->expr_type, i); 247 | printf("mov [rbp - %d], %s // arg[%d]\n", arg_var->byte_offset, 248 | param_reg_name, i); 249 | } 250 | GenerateForNode(node->func_body); 251 | printf("pop r15\n"); 252 | printf("pop r14\n"); 253 | printf("pop r13\n"); 254 | printf("pop r12\n"); 255 | printf("mov rsp, rbp\n"); 256 | printf("pop rbp\n"); 257 | printf("ret\n"); 258 | return; 259 | } 260 | assert(node && node->op); 261 | if (node->type == kASTExpr) { 262 | if (IsTokenWithType(node->op, kTokenIntegerConstant)) { 263 | printf("mov %s, %ld\n", reg_names_64[node->reg], 264 | strtol(node->op->begin, NULL, 0)); 265 | return; 266 | } else if (IsTokenWithType(node->op, kTokenCharLiteral)) { 267 | if (node->op->length == (1 + 1 + 1)) { 268 | printf("mov %s, %d\n", reg_names_64[node->reg], node->op->begin[1]); 269 | return; 270 | } 271 | if (node->op->length == (1 + 2 + 1) && node->op->begin[1] == '\\') { 272 | if (node->op->begin[2] == 'n') { 273 | printf("mov %s, %d\n", reg_names_64[node->reg], '\n'); 274 | return; 275 | } 276 | if (node->op->begin[2] == '\\') { 277 | printf("mov %s, %d\n", reg_names_64[node->reg], '\\'); 278 | return; 279 | } 280 | } 281 | ErrorWithToken(node->op, "Not implemented char literal"); 282 | } else if (IsEqualTokenWithCStr(node->op, "(")) { 283 | GenerateForNode(node->right); 284 | return; 285 | } else if (IsEqualTokenWithCStr(node->op, ".")) { 286 | GenerateForNodeRValue(node->left); 287 | printf("add %s, %d # struct member ofs\n", reg_names_64[node->reg], 288 | node->byte_offset); 289 | return; 290 | } else if (IsEqualTokenWithCStr(node->op, "->")) { 291 | GenerateForNodeRValue(node->left); 292 | printf("add %s, %d # struct member ofs\n", reg_names_64[node->reg], 293 | node->byte_offset); 294 | return; 295 | } else if (IsEqualTokenWithCStr(node->op, "[")) { 296 | GenerateForNodeRValue(node->left); 297 | GenerateForNodeRValue(node->right); 298 | int elem_size = GetSizeOfType(node->expr_type); 299 | printf("imul %s, %s, %d\n", reg_names_64[node->right->reg], 300 | reg_names_64[node->right->reg], elem_size); 301 | printf("add %s, %s\n", reg_names_64[node->left->reg], 302 | reg_names_64[node->right->reg]); 303 | return; 304 | } else if (IsTokenWithType(node->op, kTokenIdent)) { 305 | if (node->expr_type->type == kTypeFunction) { 306 | const char *label_name = CreateTokenStr(node->op); 307 | printf(".global %s%s\n", symbol_prefix, label_name); 308 | printf("mov %s, [rip + %s%s@GOTPCREL]\n", reg_names_64[node->reg], 309 | symbol_prefix, label_name); 310 | return; 311 | } 312 | if (!node->byte_offset) { 313 | // global var 314 | const char *label_name = CreateTokenStr(node->op); 315 | printf(".global %s%s\n", symbol_prefix, label_name); 316 | printf("mov %s, [rip + %s%s@GOTPCREL]\n", reg_names_64[node->reg], 317 | symbol_prefix, label_name); 318 | return; 319 | } 320 | printf("lea %s, [rbp - %d]\n", reg_names_64[node->reg], 321 | node->byte_offset); 322 | return; 323 | } else if (IsTokenWithType(node->op, kTokenStringLiteral)) { 324 | int str_label = GetLabelNumber(); 325 | printf("lea %s, [rip + L%d]\n", reg_names_64[node->reg], str_label); 326 | node->label_number = str_label; 327 | PushToList(str_list, node); 328 | return; 329 | } else if (node->cond) { 330 | GenerateForNodeRValue(node->cond); 331 | int false_label = GetLabelNumber(); 332 | int end_label = GetLabelNumber(); 333 | EmitConvertToBool(node->cond->reg, node->cond->reg); 334 | printf("jz L%d\n", false_label); 335 | GenerateForNodeRValue(node->left); 336 | printf("mov %s, %s\n", reg_names_64[node->reg], 337 | reg_names_64[node->left->reg]); 338 | printf("jmp L%d\n", end_label); 339 | printf("L%d:\n", false_label); 340 | GenerateForNodeRValue(node->right); 341 | printf("mov %s, %s\n", reg_names_64[node->reg], 342 | reg_names_64[node->right->reg]); 343 | printf("L%d:\n", end_label); 344 | return; 345 | } else if (!node->left && node->right) { 346 | if (IsEqualTokenWithCStr(node->op, "--")) { 347 | // Prefix -- 348 | int size = GetSizeOfType(node->expr_type); 349 | GenerateForNode(node->right); 350 | EmitDecMemory(node->op, node->reg, GetSizeOfType(node->expr_type)); 351 | EmitMoveFromMemory(node->op, node->reg, node->reg, size); 352 | return; 353 | } 354 | if (IsEqualTokenWithCStr(node->op, "++")) { 355 | // Prefix ++ 356 | int size = GetSizeOfType(node->expr_type); 357 | GenerateForNode(node->right); 358 | EmitIncMemory(node->op, node->reg, GetSizeOfType(node->expr_type)); 359 | EmitMoveFromMemory(node->op, node->reg, node->reg, size); 360 | return; 361 | } 362 | if (IsTokenWithType(node->op, kTokenKwSizeof)) { 363 | printf("mov %s, %d\n", reg_names_64[node->reg], 364 | GetSizeOfType(node->right->expr_type)); 365 | return; 366 | } 367 | if (IsEqualTokenWithCStr(node->op, "&")) { 368 | GenerateForNode(node->right); 369 | return; 370 | } 371 | GenerateForNodeRValue(node->right); 372 | if (IsEqualTokenWithCStr(node->op, "+")) { 373 | return; 374 | } 375 | if (IsEqualTokenWithCStr(node->op, "-")) { 376 | printf("neg %s\n", reg_names_64[node->reg]); 377 | return; 378 | } 379 | if (IsEqualTokenWithCStr(node->op, "~")) { 380 | printf("not %s\n", reg_names_64[node->reg]); 381 | return; 382 | } 383 | if (IsEqualTokenWithCStr(node->op, "!")) { 384 | EmitConvertToBool(node->reg, node->reg); 385 | printf("setz %s\n", reg_names_8[node->reg]); 386 | return; 387 | } 388 | if (IsEqualTokenWithCStr(node->op, "*")) { 389 | return; 390 | } 391 | ErrorWithToken(node->op, 392 | "GenerateForNode: Not implemented unary prefix op"); 393 | } else if (node->left && !node->right) { 394 | if (IsEqualTokenWithCStr(node->op, "++")) { 395 | // Postfix ++ 396 | int size = GetSizeOfType(node->expr_type); 397 | GenerateForNode(node->left); 398 | EmitIncMemory(node->op, node->reg, size); 399 | EmitMoveFromMemory(node->op, node->reg, node->reg, size); 400 | printf("sub %s, 1\n", reg_names_64[node->reg]); 401 | return; 402 | } 403 | if (IsEqualTokenWithCStr(node->op, "--")) { 404 | // Postfix -- 405 | int size = GetSizeOfType(node->expr_type); 406 | GenerateForNode(node->left); 407 | EmitDecMemory(node->op, node->reg, GetSizeOfType(node->expr_type)); 408 | EmitMoveFromMemory(node->op, node->reg, node->reg, size); 409 | printf("add %s, 1\n", reg_names_64[node->reg]); 410 | return; 411 | } 412 | ErrorWithToken(node->op, 413 | "GenerateForNode: Not implemented unary postfix op"); 414 | } else if (node->left && node->right) { 415 | if (IsEqualTokenWithCStr(node->op, "&&")) { 416 | GenerateForNodeRValue(node->left); 417 | int skip_label = GetLabelNumber(); 418 | EmitConvertToBool(node->reg, node->left->reg); 419 | printf("jz L%d\n", skip_label); 420 | GenerateForNodeRValue(node->right); 421 | EmitConvertToBool(node->reg, node->right->reg); 422 | printf("L%d:\n", skip_label); 423 | return; 424 | } else if (IsEqualTokenWithCStr(node->op, "||")) { 425 | GenerateForNodeRValue(node->left); 426 | int skip_label = GetLabelNumber(); 427 | EmitConvertToBool(node->reg, node->left->reg); 428 | printf("jnz L%d\n", skip_label); 429 | GenerateForNodeRValue(node->right); 430 | EmitConvertToBool(node->reg, node->right->reg); 431 | printf("L%d:\n", skip_label); 432 | return; 433 | } else if (IsEqualTokenWithCStr(node->op, ",")) { 434 | GenerateForNode(node->left); 435 | GenerateForNodeRValue(node->right); 436 | return; 437 | } else if (IsEqualTokenWithCStr(node->op, "=") || 438 | IsEqualTokenWithCStr(node->op, "+=") || 439 | IsEqualTokenWithCStr(node->op, "-=") || 440 | IsEqualTokenWithCStr(node->op, "*=") || 441 | IsEqualTokenWithCStr(node->op, "/=") || 442 | IsEqualTokenWithCStr(node->op, "%=") || 443 | IsEqualTokenWithCStr(node->op, "<<=") || 444 | IsEqualTokenWithCStr(node->op, ">>=")) { 445 | GenerateForNode(node->left); 446 | GenerateForNodeRValue(node->right); 447 | int size = GetSizeOfType(node->left->expr_type); 448 | if (IsEqualTokenWithCStr(node->op, "=")) { 449 | EmitMoveToMemory(node->op, node->left->reg, node->right->reg, size); 450 | return; 451 | } 452 | if (IsEqualTokenWithCStr(node->op, "+=")) { 453 | EmitAddToMemory(node->op, node->left->reg, node->right->reg, size); 454 | return; 455 | } 456 | if (IsEqualTokenWithCStr(node->op, "-=")) { 457 | EmitSubFromMemory(node->op, node->left->reg, node->right->reg, size); 458 | return; 459 | } 460 | if (IsEqualTokenWithCStr(node->op, "*=")) { 461 | EmitMulToMemory(node->op, node->left->reg, node->right->reg, size); 462 | return; 463 | } 464 | if (IsEqualTokenWithCStr(node->op, "/=")) { 465 | EmitDivToMemory(node->op, node->left->reg, node->right->reg, size); 466 | return; 467 | } 468 | if (IsEqualTokenWithCStr(node->op, "%=")) { 469 | EmitModToMemory(node->op, node->left->reg, node->right->reg, size); 470 | return; 471 | } 472 | if (IsEqualTokenWithCStr(node->op, "<<=")) { 473 | EmitLShiftMemory(node->op, node->left->reg, node->right->reg, size); 474 | return; 475 | } 476 | if (IsEqualTokenWithCStr(node->op, ">>=")) { 477 | EmitRShiftMemory(node->op, node->left->reg, node->right->reg, size); 478 | return; 479 | } 480 | assert(false); 481 | } 482 | GenerateForNodeRValue(node->left); 483 | GenerateForNodeRValue(node->right); 484 | if (IsEqualTokenWithCStr(node->op, "+")) { 485 | printf("add %s, %s\n", reg_names_64[node->reg], 486 | reg_names_64[node->right->reg]); 487 | return; 488 | } else if (IsEqualTokenWithCStr(node->op, "-")) { 489 | printf("sub %s, %s\n", reg_names_64[node->reg], 490 | reg_names_64[node->right->reg]); 491 | return; 492 | } else if (IsEqualTokenWithCStr(node->op, "*")) { 493 | // rdx:rax <- rax * r/m 494 | printf("xor rdx, rdx\n"); 495 | printf("mov rax, %s\n", reg_names_64[node->reg]); 496 | printf("imul %s\n", reg_names_64[node->right->reg]); 497 | printf("mov %s, rax\n", reg_names_64[node->reg]); 498 | return; 499 | } else if (IsEqualTokenWithCStr(node->op, "/")) { 500 | // rax <- rdx:rax / r/m 501 | printf("xor rdx, rdx\n"); 502 | printf("mov rax, %s\n", reg_names_64[node->reg]); 503 | printf("idiv %s\n", reg_names_64[node->right->reg]); 504 | printf("mov %s, rax\n", reg_names_64[node->reg]); 505 | return; 506 | } else if (IsEqualTokenWithCStr(node->op, "%")) { 507 | // rdx <- rdx:rax % r/m 508 | printf("xor rdx, rdx\n"); 509 | printf("mov rax, %s\n", reg_names_64[node->reg]); 510 | printf("idiv %s\n", reg_names_64[node->right->reg]); 511 | printf("mov %s, rdx\n", reg_names_64[node->reg]); 512 | return; 513 | } else if (IsEqualTokenWithCStr(node->op, "<<")) { 514 | // r/m <<= CL 515 | printf("mov rcx, %s\n", reg_names_64[node->right->reg]); 516 | printf("sal %s, cl\n", reg_names_64[node->reg]); 517 | return; 518 | } else if (IsEqualTokenWithCStr(node->op, ">>")) { 519 | // r/m >>= CL 520 | printf("mov rcx, %s\n", reg_names_64[node->right->reg]); 521 | printf("sar %s, cl\n", reg_names_64[node->reg]); 522 | return; 523 | } else if (IsEqualTokenWithCStr(node->op, "<")) { 524 | EmitCompareIntegers(node->reg, node->left->reg, node->right->reg, "l"); 525 | return; 526 | } else if (IsEqualTokenWithCStr(node->op, ">")) { 527 | EmitCompareIntegers(node->reg, node->left->reg, node->right->reg, "g"); 528 | return; 529 | } else if (IsEqualTokenWithCStr(node->op, "<=")) { 530 | EmitCompareIntegers(node->reg, node->left->reg, node->right->reg, "le"); 531 | return; 532 | } else if (IsEqualTokenWithCStr(node->op, ">=")) { 533 | EmitCompareIntegers(node->reg, node->left->reg, node->right->reg, "ge"); 534 | return; 535 | } else if (IsEqualTokenWithCStr(node->op, "==")) { 536 | EmitCompareIntegers(node->reg, node->left->reg, node->right->reg, "e"); 537 | return; 538 | } else if (IsEqualTokenWithCStr(node->op, "!=")) { 539 | EmitCompareIntegers(node->reg, node->left->reg, node->right->reg, "ne"); 540 | return; 541 | } else if (IsEqualTokenWithCStr(node->op, "&")) { 542 | printf("and %s, %s\n", reg_names_64[node->reg], 543 | reg_names_64[node->right->reg]); 544 | return; 545 | } else if (IsEqualTokenWithCStr(node->op, "^")) { 546 | printf("xor %s, %s\n", reg_names_64[node->reg], 547 | reg_names_64[node->right->reg]); 548 | return; 549 | } else if (IsEqualTokenWithCStr(node->op, "|")) { 550 | printf("or %s, %s\n", reg_names_64[node->reg], 551 | reg_names_64[node->right->reg]); 552 | return; 553 | } 554 | } 555 | } 556 | if (node->type == kASTExprStmt) { 557 | if (node->left) GenerateForNode(node->left); 558 | return; 559 | } else if (node->type == kASTList) { 560 | for (int i = 0; i < GetSizeOfList(node); i++) { 561 | GenerateForNode(GetNodeAt(node, i)); 562 | } 563 | return; 564 | } else if (node->type == kASTDecl) { 565 | if (IsASTDeclOfTypedef(node)) { 566 | return; 567 | } 568 | assert(node->right && node->right->type == kASTDecltor); 569 | if (!node->right->decltor_init_expr) return; 570 | GenerateForNode(node->right->decltor_init_expr); 571 | return; 572 | } else if (node->type == kASTJumpStmt) { 573 | if (IsTokenWithType(node->op, kTokenKwBreak)) { 574 | if (!label_to_break) { 575 | ErrorWithToken(node->op, "break is not allowed here"); 576 | } 577 | printf("jmp L%d\n", label_to_break); 578 | return; 579 | } 580 | if (IsTokenWithType(node->op, kTokenKwContinue)) { 581 | if (!label_to_continue) { 582 | ErrorWithToken(node->op, "continue is not allowed here"); 583 | } 584 | printf("jmp L%d\n", label_to_continue); 585 | return; 586 | } 587 | if (IsTokenWithType(node->op, kTokenKwReturn)) { 588 | if (node->right) { 589 | GenerateForNodeRValue(node->right); 590 | printf("mov rax, %s\n", reg_names_64[node->right->reg]); 591 | } 592 | printf("mov rsp, rbp\n"); 593 | printf("pop rbp\n"); 594 | printf("ret\n"); 595 | return; 596 | } 597 | ErrorWithToken(node->op, "GenerateForNode: Not implemented jump stmt"); 598 | } else if (node->type == kASTSelectionStmt) { 599 | if (IsTokenWithType(node->op, kTokenKwIf)) { 600 | GenerateForNodeRValue(node->cond); 601 | int false_label = GetLabelNumber(); 602 | int end_label = GetLabelNumber(); 603 | EmitConvertToBool(node->cond->reg, node->cond->reg); 604 | printf("jz L%d\n", false_label); 605 | GenerateForNodeRValue(node->if_true_stmt); 606 | printf("jmp L%d\n", end_label); 607 | printf("L%d:\n", false_label); 608 | if (node->if_else_stmt) { 609 | GenerateForNodeRValue(node->if_else_stmt); 610 | } 611 | printf("L%d:\n", end_label); 612 | return; 613 | } 614 | ErrorWithToken(node->op, "GenerateForNode: Not implemented jump stmt"); 615 | } else if (node->type == kASTForStmt) { 616 | int loop_label = GetLabelNumber(); 617 | int end_label = GetLabelNumber(); 618 | int old_label_to_break = label_to_break; 619 | label_to_break = end_label; 620 | int old_label_to_continue = label_to_break; 621 | label_to_continue = loop_label; 622 | if (node->init) { 623 | GenerateForNode(node->init); 624 | } 625 | printf("L%d:\n", loop_label); 626 | if (node->cond) { 627 | GenerateForNodeRValue(node->cond); 628 | EmitConvertToBool(node->cond->reg, node->cond->reg); 629 | printf("jz L%d\n", end_label); 630 | } 631 | GenerateForNode(node->body); 632 | if (node->updt) { 633 | GenerateForNode(node->updt); 634 | } 635 | printf("jmp L%d\n", loop_label); 636 | printf("L%d:\n", end_label); 637 | label_to_continue = old_label_to_continue; 638 | label_to_break = old_label_to_break; 639 | return; 640 | } else if (node->type == kASTWhileStmt) { 641 | int loop_label = GetLabelNumber(); 642 | int end_label = GetLabelNumber(); 643 | int old_label_to_break = label_to_break; 644 | label_to_break = end_label; 645 | int old_label_to_continue = label_to_break; 646 | label_to_continue = loop_label; 647 | printf("L%d:\n", loop_label); 648 | GenerateForNodeRValue(node->cond); 649 | EmitConvertToBool(node->cond->reg, node->cond->reg); 650 | printf("jz L%d\n", end_label); 651 | GenerateForNode(node->body); 652 | printf("jmp L%d\n", loop_label); 653 | printf("L%d:\n", end_label); 654 | label_to_continue = old_label_to_continue; 655 | label_to_break = old_label_to_break; 656 | return; 657 | } 658 | ErrorWithToken(node->op, "GenerateForNode: Not implemented"); 659 | } 660 | 661 | static void GenerateForNodeRValue(struct Node *node) { 662 | GenerateForNode(node); 663 | if (!node->expr_type) return; 664 | if (node->expr_type->type != kTypeLValue) return; 665 | if (node->expr_type->type == kTypeLValue && 666 | node->expr_type->right->type == kTypeArray) 667 | return; 668 | int size = GetSizeOfType(GetRValueType(node->expr_type)); 669 | if (size == 8) { 670 | printf("mov %s, [%s]\n", reg_names_64[node->reg], reg_names_64[node->reg]); 671 | return; 672 | } else if (size == 4) { 673 | printf("movsxd %s, dword ptr[%s]\n", reg_names_64[node->reg], 674 | reg_names_64[node->reg]); 675 | return; 676 | } else if (size == 1) { 677 | printf("movsx %s, byte ptr[%s]\n", reg_names_64[node->reg], 678 | reg_names_64[node->reg]); 679 | return; 680 | } 681 | ErrorWithToken(node->op, "Dereferencing %d bytes is not implemented.", size); 682 | } 683 | 684 | static void GenerateDataSection(struct SymbolEntry *toplevel_names) { 685 | printf(".data\n"); 686 | for (int i = 0; i < GetSizeOfList(str_list); i++) { 687 | struct Node *n = GetNodeAt(str_list, i); 688 | printf("L%d: ", n->label_number); 689 | printf(".asciz "); 690 | PrintTokenStrToFile(n->op, stdout); 691 | putchar('\n'); 692 | } 693 | struct SymbolEntry *e = toplevel_names; 694 | for (; e; e = e->prev) { 695 | if (e->type != kSymbolGlobalVar) continue; 696 | int size = GetSizeOfType(e->value); 697 | fprintf(stderr, "Global Var: %s = %d bytes\n", e->key, size); 698 | printf(".global %s%s\n", symbol_prefix, e->key); 699 | printf("%s%s:\n", symbol_prefix, e->key); 700 | printf(".byte "); 701 | for (int i = 0; i < size; i++) { 702 | printf("0%s", i == (size - 1) ? "\n" : ", "); 703 | } 704 | } 705 | } 706 | 707 | void Generate(struct Node *ast, struct SymbolEntry *toplevel_names) { 708 | label_to_break = 0; 709 | label_to_continue = 0; 710 | str_list = AllocList(); 711 | printf(".intel_syntax noprefix\n"); 712 | printf(".text\n"); 713 | GenerateForNode(ast); 714 | GenerateDataSection(toplevel_names); 715 | } 716 | -------------------------------------------------------------------------------- /include/stdarg.h: -------------------------------------------------------------------------------- 1 | #define va_start(ap, param) __builtin_va_start(ap, param) 2 | #define va_end(ap) __builtin_va_end(ap) 3 | #define va_arg(ap, type) __builtin_va_arg(ap, type) 4 | #define va_list __builtin_va_list 5 | -------------------------------------------------------------------------------- /include/stdbool.h: -------------------------------------------------------------------------------- 1 | #define true 1 2 | #define false 0 3 | #define bool _Bool 4 | -------------------------------------------------------------------------------- /include/stdio.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define NULL 0 4 | #define EOF (-1) 5 | 6 | typedef unsigned long size_t; 7 | typedef struct FILE FILE; 8 | 9 | struct FILE; 10 | 11 | #ifdef __APPLE__ 12 | #define stdin __stdinp 13 | extern FILE *__stdinp; 14 | #define stdout __stdoutp 15 | extern FILE *__stdoutp; 16 | #define stderr __stderrp 17 | extern FILE *__stderrp; 18 | #else 19 | extern FILE *stdin; 20 | extern FILE *stdout; 21 | extern FILE *stderr; 22 | 23 | #endif 24 | 25 | FILE *fopen(const char *, const char *); 26 | int fclose(FILE *); 27 | int fflush(FILE *); 28 | int fgetc(FILE *); 29 | int fprintf(FILE *, const char *, ...); 30 | int fputc(int c, FILE *); 31 | int puts(char *s); 32 | int fputs(const char *, FILE *); 33 | int getchar(void); 34 | int printf(const char *, ...); 35 | int putchar(int c); 36 | int snprintf(char *, unsigned long, const char *, ...); 37 | int vfprintf(struct FILE *, const char *, va_list); 38 | -------------------------------------------------------------------------------- /include/stdlib.h: -------------------------------------------------------------------------------- 1 | void* malloc(size_t size); 2 | void* calloc(size_t count, size_t size); 3 | void* realloc(void* ptr, size_t size); 4 | #define EXIT_FAILURE 1 5 | #define EXIT_SUCCESS 0 6 | void exit(int status); 7 | long strtol(const char* str, char** endptr, int base); 8 | -------------------------------------------------------------------------------- /include/string.h: -------------------------------------------------------------------------------- 1 | int strcmp(const char *s1, const char *s2); 2 | int strncmp(const char *s1, const char *s2, size_t n); 3 | size_t strlen(const char *s); 4 | void *memcpy(void *dst, const void *src, size_t n); 5 | char *strcpy(char *dst, const char *src); 6 | char *strcat(char *s1, const char *s2); 7 | -------------------------------------------------------------------------------- /linkage_test/Makefile: -------------------------------------------------------------------------------- 1 | SRCS=linkage_test.c external.c 2 | ASMS=$(addsuffix .S, $(basename $(SRCS))) 3 | 4 | default: test 5 | 6 | .FORCE : 7 | 8 | test: 9 | make validate 10 | make run 11 | 12 | run: linkage_test.bin 13 | ./linkage_test.bin 14 | 15 | validate: linkage_test.host.bin 16 | ./linkage_test.host.bin 17 | 18 | format: 19 | clang-format -i *.c 20 | 21 | linkage_test.host.bin : $(SRCS) .FORCE 22 | $(CC) -Wall -pedantic -o $@ ${SRCS} 23 | 24 | linkage_test.bin : $(ASMS) .FORCE 25 | $(CC) -Wall -pedantic -o $@ ${ASMS} 26 | 27 | ../compilium : .FORCE 28 | make -C .. compilium 29 | 30 | %.host.S : %.c Makefile ../compilium .FORCE 31 | $(CC) -S -o $@ $*.c 32 | 33 | %.S : %.c Makefile ../compilium .FORCE 34 | ../compilium --target-os `uname` -I ../include/ < $*.c > $*.S 35 | 36 | clean: 37 | -rm *.bin 38 | -rm *.S 39 | -------------------------------------------------------------------------------- /linkage_test/external.c: -------------------------------------------------------------------------------- 1 | int ext_val; 2 | 3 | void ExpectEq(int actual, int expected, int line); 4 | 5 | void ef2() { 6 | ExpectEq(ext_val, 1, __LINE__); 7 | ext_val++; 8 | ExpectEq(ext_val, 2, __LINE__); 9 | } 10 | -------------------------------------------------------------------------------- /linkage_test/linkage_test.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hikalium/compilium/0da5fb7e537a0da62f4c3970d9db24400bd491c6/linkage_test/linkage_test.bin -------------------------------------------------------------------------------- /linkage_test/linkage_test.c: -------------------------------------------------------------------------------- 1 | int puts(char*); 2 | int printf(const char*, ...); 3 | void exit(int); 4 | 5 | int global_val; 6 | extern int ext_val; 7 | 8 | void ExpectEq(int actual, int expected, int line) { 9 | printf("Line %3d: ", line); 10 | if (actual != expected) { 11 | puts("FAIL"); 12 | exit(1); 13 | } 14 | puts("PASS"); 15 | } 16 | 17 | void gf1() { 18 | ExpectEq(global_val, 0, __LINE__); 19 | global_val++; 20 | ExpectEq(global_val, 1, __LINE__); 21 | } 22 | 23 | void gf2() { 24 | ExpectEq(global_val, 1, __LINE__); 25 | global_val++; 26 | ExpectEq(global_val, 2, __LINE__); 27 | } 28 | 29 | void TestGlobal() { 30 | global_val = 0; 31 | ExpectEq(global_val, 0, __LINE__); 32 | gf1(); 33 | gf2(); 34 | ExpectEq(global_val, 2, __LINE__); 35 | } 36 | 37 | void ef1() { 38 | ExpectEq(ext_val, 0, __LINE__); 39 | ext_val++; 40 | ExpectEq(ext_val, 1, __LINE__); 41 | } 42 | void ef2(); 43 | void ef3() { 44 | ExpectEq(ext_val, 2, __LINE__); 45 | ext_val++; 46 | ExpectEq(ext_val, 3, __LINE__); 47 | } 48 | void TestExternal() { 49 | ext_val = 0; 50 | ExpectEq(ext_val, 0, __LINE__); 51 | ef1(); 52 | ef2(); 53 | ef3(); 54 | ExpectEq(ext_val, 3, __LINE__); 55 | } 56 | 57 | int main() { 58 | TestExternal(); 59 | TestGlobal(); 60 | return 0; 61 | } 62 | -------------------------------------------------------------------------------- /linkage_test/linkage_test.host.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hikalium/compilium/0da5fb7e537a0da62f4c3970d9db24400bd491c6/linkage_test/linkage_test.host.bin -------------------------------------------------------------------------------- /parser.c: -------------------------------------------------------------------------------- 1 | #include "compilium.h" 2 | 3 | // 6.2.3 Name spaces of identifiers 4 | static struct Node *ord_idents; // ordinary identifiers 5 | 6 | struct Node *ParseStmt(); 7 | struct Node *ParseCompStmt(); 8 | struct Node *ParseDeclBody(); 9 | struct Node *ParseCastExpr(); 10 | struct Node *ParseExpr(void); 11 | 12 | struct Node *ParsePrimaryExpr() { 13 | struct Node *t; 14 | if ((t = ConsumeToken(kTokenIntegerConstant)) || 15 | (t = ConsumeToken(kTokenIdent)) || 16 | (t = ConsumeToken(kTokenCharLiteral)) || 17 | (t = ConsumeToken(kTokenStringLiteral))) { 18 | struct Node *op = AllocNode(kASTExpr); 19 | op->op = t; 20 | return op; 21 | } 22 | if ((t = ConsumePunctuator("("))) { 23 | struct Node *op = AllocNode(kASTExpr); 24 | op->op = t; 25 | op->right = ParseExpr(); 26 | if (!op->right) ErrorWithToken(t, "Expected expr after this token"); 27 | ExpectPunctuator(")"); 28 | return op; 29 | } 30 | return NULL; 31 | } 32 | 33 | struct Node *ParseAssignExpr(); 34 | struct Node *ParsePostfixExpr() { 35 | struct Node *n = ParsePrimaryExpr(); 36 | while (n) { 37 | struct Node *t; 38 | if (ConsumePunctuator("(")) { 39 | struct Node *args = AllocList(); 40 | if (!ConsumePunctuator(")")) { 41 | do { 42 | struct Node *arg_expr = ParseAssignExpr(); 43 | if (!arg_expr) 44 | ErrorWithToken(NextToken(), "Expected expression here"); 45 | PushToList(args, arg_expr); 46 | } while (ConsumePunctuator(",")); 47 | ExpectPunctuator(")"); 48 | } 49 | struct Node *nn = AllocNode(kASTExprFuncCall); 50 | nn->func_expr = n; 51 | nn->arg_expr_list = args; 52 | n = nn; 53 | continue; 54 | } 55 | if ((t = ConsumePunctuator("["))) { 56 | n = CreateASTBinOp(t, n, ParseExpr()); 57 | ExpectPunctuator("]"); 58 | continue; 59 | } 60 | if ((t = ConsumePunctuator(".")) || (t = ConsumePunctuator("->"))) { 61 | struct Node *right = ConsumeToken(kTokenIdent); 62 | assert(right); 63 | n = CreateASTBinOp(t, n, right); 64 | continue; 65 | } 66 | if ((t = ConsumePunctuator("++"))) { 67 | n = CreateASTUnaryPostfixOp(n, t); 68 | continue; 69 | } 70 | if ((t = ConsumePunctuator("--"))) { 71 | n = CreateASTUnaryPostfixOp(n, t); 72 | continue; 73 | } 74 | break; 75 | } 76 | return n; 77 | } 78 | 79 | struct Node *ParseUnaryExpr() { 80 | struct Node *t; 81 | if ((t = ConsumePunctuator("+")) || (t = ConsumePunctuator("-")) || 82 | (t = ConsumePunctuator("~")) || (t = ConsumePunctuator("!")) || 83 | (t = ConsumePunctuator("&")) || (t = ConsumePunctuator("*"))) { 84 | return CreateASTUnaryPrefixOp(t, ParseCastExpr()); 85 | } else if ((t = ConsumePunctuator("--")) || (t = ConsumePunctuator("++")) || 86 | (t = ConsumeToken(kTokenKwSizeof))) { 87 | return CreateASTUnaryPrefixOp(t, ParseUnaryExpr()); 88 | } 89 | return ParsePostfixExpr(); 90 | } 91 | 92 | struct Node *ParseCastExpr() { 93 | return ParseUnaryExpr(); 94 | } 95 | 96 | struct Node *ParseMulExpr() { 97 | struct Node *op = ParseCastExpr(); 98 | if (!op) return NULL; 99 | struct Node *t; 100 | while ((t = ConsumePunctuator("*")) || (t = ConsumePunctuator("/")) || 101 | (t = ConsumePunctuator("%"))) { 102 | op = CreateASTBinOp(t, op, ParseCastExpr()); 103 | } 104 | return op; 105 | } 106 | 107 | struct Node *ParseAddExpr() { 108 | struct Node *op = ParseMulExpr(); 109 | if (!op) return NULL; 110 | struct Node *t; 111 | while ((t = ConsumePunctuator("+")) || (t = ConsumePunctuator("-"))) { 112 | op = CreateASTBinOp(t, op, ParseMulExpr()); 113 | } 114 | return op; 115 | } 116 | 117 | struct Node *ParseShiftExpr() { 118 | struct Node *op = ParseAddExpr(); 119 | if (!op) return NULL; 120 | struct Node *t; 121 | while ((t = ConsumePunctuator("<<")) || (t = ConsumePunctuator(">>"))) { 122 | op = CreateASTBinOp(t, op, ParseAddExpr()); 123 | } 124 | return op; 125 | } 126 | 127 | struct Node *ParseRelExpr() { 128 | struct Node *op = ParseShiftExpr(); 129 | if (!op) return NULL; 130 | struct Node *t; 131 | while ((t = ConsumePunctuator("<")) || (t = ConsumePunctuator(">")) || 132 | (t = ConsumePunctuator("<=")) || (t = ConsumePunctuator(">="))) { 133 | op = CreateASTBinOp(t, op, ParseShiftExpr()); 134 | } 135 | return op; 136 | } 137 | 138 | struct Node *ParseEqExpr() { 139 | struct Node *op = ParseRelExpr(); 140 | if (!op) return NULL; 141 | struct Node *t; 142 | while ((t = ConsumePunctuator("==")) || (t = ConsumePunctuator("!="))) { 143 | op = CreateASTBinOp(t, op, ParseRelExpr()); 144 | } 145 | return op; 146 | } 147 | 148 | struct Node *ParseAndExpr() { 149 | struct Node *op = ParseEqExpr(); 150 | if (!op) return NULL; 151 | struct Node *t; 152 | while ((t = ConsumePunctuator("&"))) { 153 | op = CreateASTBinOp(t, op, ParseEqExpr()); 154 | } 155 | return op; 156 | } 157 | 158 | struct Node *ParseXorExpr() { 159 | struct Node *op = ParseAndExpr(); 160 | if (!op) return NULL; 161 | struct Node *t; 162 | while ((t = ConsumePunctuator("^"))) { 163 | op = CreateASTBinOp(t, op, ParseAndExpr()); 164 | } 165 | return op; 166 | } 167 | 168 | struct Node *ParseOrExpr() { 169 | struct Node *op = ParseXorExpr(); 170 | if (!op) return NULL; 171 | struct Node *t; 172 | while ((t = ConsumePunctuator("|"))) { 173 | op = CreateASTBinOp(t, op, ParseXorExpr()); 174 | } 175 | return op; 176 | } 177 | 178 | struct Node *ParseBoolAndExpr() { 179 | struct Node *op = ParseOrExpr(); 180 | if (!op) return NULL; 181 | struct Node *t; 182 | while ((t = ConsumePunctuator("&&"))) { 183 | op = CreateASTBinOp(t, op, ParseOrExpr()); 184 | } 185 | return op; 186 | } 187 | 188 | struct Node *ParseBoolOrExpr() { 189 | struct Node *op = ParseBoolAndExpr(); 190 | if (!op) return NULL; 191 | struct Node *t; 192 | while ((t = ConsumePunctuator("||"))) { 193 | op = CreateASTBinOp(t, op, ParseBoolAndExpr()); 194 | } 195 | return op; 196 | } 197 | 198 | struct Node *ParseConditionalExpr() { 199 | struct Node *expr = ParseBoolOrExpr(); 200 | if (!expr) return NULL; 201 | struct Node *t; 202 | if ((t = ConsumePunctuator("?"))) { 203 | struct Node *op = AllocNode(kASTExpr); 204 | op->op = t; 205 | op->cond = expr; 206 | op->left = ParseConditionalExpr(); 207 | if (!op->left) 208 | ErrorWithToken(t, "Expected true-expr for this conditional expr"); 209 | ExpectPunctuator(":"); 210 | op->right = ParseConditionalExpr(); 211 | if (!op->right) 212 | ErrorWithToken(t, "Expected false-expr for this conditional expr"); 213 | return op; 214 | } 215 | return expr; 216 | } 217 | 218 | struct Node *ParseAssignExpr() { 219 | struct Node *left = ParseConditionalExpr(); 220 | if (!left) return NULL; 221 | struct Node *t; 222 | if ((t = ConsumePunctuator("=")) || (t = ConsumePunctuator("+=")) || 223 | (t = ConsumePunctuator("-=")) || (t = ConsumePunctuator("*=")) || 224 | (t = ConsumePunctuator("/=")) || (t = ConsumePunctuator("%=")) || 225 | (t = ConsumePunctuator("<<=")) || (t = ConsumePunctuator(">>="))) { 226 | struct Node *right = ParseAssignExpr(); 227 | if (!right) ErrorWithToken(t, "Expected expr after this token"); 228 | return CreateASTBinOp(t, left, right); 229 | } 230 | return left; 231 | } 232 | 233 | struct Node *ParseExpr() { 234 | struct Node *op = ParseAssignExpr(); 235 | if (!op) return NULL; 236 | struct Node *t; 237 | while ((t = ConsumePunctuator(","))) { 238 | op = CreateASTBinOp(t, op, ParseAssignExpr()); 239 | } 240 | return op; 241 | } 242 | 243 | struct Node *ParseExprStmt() { 244 | struct Node *expr = ParseExpr(); 245 | struct Node *t; 246 | if ((t = ConsumePunctuator(";"))) { 247 | return CreateASTExprStmt(t, expr); 248 | } else if (expr) { 249 | ExpectPunctuator(";"); 250 | } 251 | return NULL; 252 | } 253 | 254 | struct Node *ParseSelectionStmt() { 255 | struct Node *t; 256 | if ((t = ConsumeToken(kTokenKwIf))) { 257 | ExpectPunctuator("("); 258 | struct Node *expr = ParseExpr(); 259 | assert(expr); 260 | ExpectPunctuator(")"); 261 | struct Node *stmt_true = ParseStmt(); 262 | assert(stmt_true); 263 | struct Node *stmt = AllocNode(kASTSelectionStmt); 264 | stmt->op = t; 265 | stmt->cond = expr; 266 | stmt->if_true_stmt = stmt_true; 267 | if (ConsumeToken(kTokenKwElse)) { 268 | stmt->if_else_stmt = ParseStmt(); 269 | } 270 | return stmt; 271 | } 272 | return NULL; 273 | } 274 | 275 | struct Node *ParseJumpStmt() { 276 | // jump-statement 277 | struct Node *t; 278 | if ((t = ConsumeToken(kTokenKwBreak)) || 279 | (t = ConsumeToken(kTokenKwContinue))) { 280 | ExpectPunctuator(";"); 281 | struct Node *stmt = AllocNode(kASTJumpStmt); 282 | stmt->op = t; 283 | return stmt; 284 | } 285 | if ((t = ConsumeToken(kTokenKwReturn))) { 286 | struct Node *expr = ParseExpr(); 287 | ExpectPunctuator(";"); 288 | struct Node *stmt = AllocNode(kASTJumpStmt); 289 | stmt->op = t; 290 | stmt->right = expr; 291 | return stmt; 292 | } 293 | return NULL; 294 | } 295 | 296 | struct Node *ParseIterationStmt() { 297 | struct Node *t; 298 | if ((t = ConsumeToken(kTokenKwFor))) { 299 | ExpectPunctuator("("); 300 | struct Node *init = ParseDeclBody(); 301 | if (!init) init = ParseExpr(); 302 | ExpectPunctuator(";"); 303 | struct Node *cond = ParseExpr(); 304 | ExpectPunctuator(";"); 305 | struct Node *updt = ParseExpr(); 306 | ExpectPunctuator(")"); 307 | struct Node *body = ParseStmt(); 308 | assert(body); 309 | 310 | struct Node *stmt = AllocNode(kASTForStmt); 311 | stmt->op = t; 312 | stmt->init = init; 313 | stmt->cond = cond; 314 | stmt->updt = updt; 315 | stmt->body = body; 316 | return stmt; 317 | } 318 | if ((t = ConsumeToken(kTokenKwWhile))) { 319 | ExpectPunctuator("("); 320 | struct Node *cond = ParseExpr(); 321 | assert(cond); 322 | ExpectPunctuator(")"); 323 | struct Node *body = ParseStmt(); 324 | assert(body); 325 | 326 | struct Node *stmt = AllocNode(kASTWhileStmt); 327 | stmt->op = t; 328 | stmt->cond = cond; 329 | stmt->body = body; 330 | return stmt; 331 | } 332 | return NULL; 333 | } 334 | 335 | struct Node *ParseStmt() { 336 | struct Node *stmt; 337 | if ((stmt = ParseExprStmt()) || (stmt = ParseJumpStmt()) || 338 | (stmt = ParseSelectionStmt()) || (stmt = ParseCompStmt()) || 339 | (stmt = ParseIterationStmt())) 340 | return stmt; 341 | return NULL; 342 | } 343 | 344 | struct Node *ParseDecl(); 345 | struct Node *ParseDeclSpecs() { 346 | // returns Node or NULL 347 | struct Node *decl_specs = AllocList(); 348 | for (;;) { 349 | struct Node *decl_spec; 350 | // storage-class-specifier 351 | if ((decl_spec = ConsumeToken(kTokenKwTypedef)) || 352 | (decl_spec = ConsumeToken(kTokenKwExtern))) { 353 | PushToList(decl_specs, decl_spec); 354 | continue; 355 | } 356 | if ((decl_spec = ConsumeToken(kTokenKwStatic))) { 357 | continue; 358 | } 359 | // type-qualifier 360 | if ((decl_spec = ConsumeToken(kTokenKwConst))) { 361 | PushToList(decl_specs, decl_spec); 362 | continue; 363 | } 364 | if ((decl_spec = ConsumeToken(kTokenKwVoid)) || 365 | (decl_spec = ConsumeToken(kTokenKwChar)) || 366 | (decl_spec = ConsumeToken(kTokenKwInt)) || 367 | (decl_spec = ConsumeToken(kTokenKwLong)) || 368 | (decl_spec = ConsumeToken(kTokenKwUnsigned))) { 369 | PushToList(decl_specs, decl_spec); 370 | continue; 371 | } 372 | // builtin type name 373 | if ((decl_spec = ConsumeTokenStr("__builtin_va_list"))) { 374 | PushToList(decl_specs, decl_spec); 375 | continue; 376 | } 377 | // typedef name 378 | struct Node *typedef_type = GetNodeByTokenKey(ord_idents, PeekToken()); 379 | if (typedef_type) { 380 | PushToList(decl_specs, typedef_type); 381 | NextToken(); 382 | continue; 383 | } 384 | // struct-or-union-specifier 385 | if (ConsumeToken(kTokenKwStruct)) { 386 | struct Node *struct_spec = AllocNode(kASTStructSpec); 387 | struct_spec->tag = ConsumeToken(kTokenIdent); 388 | assert(struct_spec->tag); 389 | if (ConsumePunctuator("{")) { 390 | struct_spec->struct_member_dict = AllocList(); 391 | struct Node *decl; 392 | while ((decl = ParseDecl())) { 393 | AddMemberOfStructFromDecl(struct_spec, decl); 394 | } 395 | ExpectPunctuator("}"); 396 | } 397 | PushToList(decl_specs, struct_spec); 398 | continue; 399 | } 400 | break; 401 | }; 402 | if (GetSizeOfList(decl_specs) == 0) { 403 | return NULL; 404 | } 405 | return decl_specs; 406 | } 407 | 408 | struct Node *ParseParamDecl(); 409 | struct Node *ParseDecltor(); 410 | struct Node *ParseDirectDecltor() { 411 | // always allow abstract decltors 412 | struct Node *n = NULL; 413 | struct Node *t; 414 | if ((t = ConsumePunctuator("("))) { 415 | n = AllocNode(kASTDirectDecltor); 416 | n->op = t; 417 | n->value = ParseDecltor(); 418 | assert(n->value); 419 | ExpectPunctuator(")"); 420 | } else if ((t = ConsumeToken(kTokenIdent))) { 421 | n = AllocNode(kASTDirectDecltor); 422 | n->op = t; 423 | } 424 | while (true) { 425 | if ((t = ConsumePunctuator("("))) { 426 | struct Node *op = t; 427 | struct Node *args = AllocList(); 428 | if (!ConsumePunctuator(")")) { 429 | while (1) { 430 | if ((t = ConsumePunctuator("..."))) { 431 | PushToList(args, t); 432 | } else { 433 | struct Node *arg = ParseParamDecl(); 434 | if (!arg) { 435 | ErrorWithToken(NextToken(), "Expected ParamDecl here"); 436 | } 437 | PushToList(args, arg); 438 | } 439 | if (!ConsumePunctuator(",")) break; 440 | } 441 | ExpectPunctuator(")"); 442 | } 443 | struct Node *nn = AllocNode(kASTDirectDecltor); 444 | nn->op = op; 445 | nn->right = args; 446 | nn->left = n; 447 | n = nn; 448 | } 449 | if ((t = ConsumePunctuator("["))) { 450 | struct Node *nn = AllocNode(kASTDirectDecltor); 451 | nn->op = t; 452 | nn->right = ParseAssignExpr(); 453 | nn->left = n; 454 | n = nn; 455 | ExpectPunctuator("]"); 456 | continue; 457 | } 458 | break; 459 | } 460 | return n; 461 | } 462 | 463 | struct Node *ParseDecltor() { 464 | struct Node *n = AllocNode(kASTDecltor); 465 | struct Node *pointer = NULL; 466 | struct Node *t; 467 | while ((t = ConsumePunctuator("*"))) { 468 | pointer = CreateTypePointer(pointer); 469 | } 470 | n->left = pointer; 471 | n->right = ParseDirectDecltor(); 472 | return n; 473 | } 474 | 475 | struct Node *ParseInitDecltor() { 476 | struct Node *decltor = ParseDecltor(); 477 | if (!decltor) return NULL; 478 | struct Node *t; 479 | if (!(t = ConsumePunctuator("="))) return decltor; 480 | struct Node *init_expr = ParseAssignExpr(); 481 | assert(init_expr); 482 | decltor->decltor_init_expr = CreateASTBinOp(t, NULL, init_expr); 483 | return decltor; 484 | } 485 | 486 | struct Node *ParseParamDecl() { 487 | struct Node *decl_specs = ParseDeclSpecs(); 488 | if (!decl_specs) return NULL; 489 | struct Node *n = AllocNode(kASTDecl); 490 | n->op = decl_specs; 491 | n->right = ParseDecltor(); 492 | return n; 493 | } 494 | 495 | struct Node *ParseDeclBody() { 496 | struct Node *decl_specs = ParseDeclSpecs(); 497 | if (!decl_specs) return NULL; 498 | struct Node *n = AllocNode(kASTDecl); 499 | n->op = decl_specs; 500 | n->right = ParseInitDecltor(); 501 | return n; 502 | } 503 | 504 | struct Node *ParseDecl() { 505 | struct Node *decl_body = ParseDeclBody(); 506 | if (!decl_body) return NULL; 507 | ExpectPunctuator(";"); 508 | return decl_body; 509 | } 510 | 511 | struct Node *ParseCompStmt() { 512 | struct Node *t; 513 | if (!(t = ConsumePunctuator("{"))) return NULL; 514 | struct Node *list = AllocList(); 515 | list->op = t; 516 | struct Node *stmt; 517 | while ((stmt = ParseDecl()) || (stmt = ParseStmt())) { 518 | PushToList(list, stmt); 519 | } 520 | ExpectPunctuator("}"); 521 | return list; 522 | } 523 | 524 | struct Node *ParseFuncDef(struct Node *decl_body) { 525 | struct Node *comp_stmt = ParseCompStmt(); 526 | if (!comp_stmt) return NULL; 527 | return CreateASTFuncDef(decl_body, comp_stmt); 528 | } 529 | 530 | void InitParser(struct Node **head_token) { 531 | InitTokenStream(RemoveDelimiterTokens(head_token)); 532 | ord_idents = AllocList(); 533 | } 534 | 535 | struct Node *Parse(struct Node **head_token) { 536 | InitParser(head_token); 537 | struct Node *list = AllocList(); 538 | struct Node *decl_body; 539 | while ((decl_body = ParseDeclBody())) { 540 | if (ConsumePunctuator(";")) { 541 | PushToList(list, decl_body); 542 | assert(IsASTList(decl_body->op)); 543 | if (IsASTDeclOfTypedef(decl_body)) { 544 | // typedef case 545 | struct Node *typedef_type = CreateTypeFromDecl(decl_body); 546 | struct Node *typedef_name = 547 | GetIdentifierTokenFromTypeAttr(typedef_type); 548 | PrintASTNode(typedef_name); 549 | PushKeyValueToList(ord_idents, CreateTokenStr(typedef_name), 550 | GetTypeWithoutAttr(typedef_type)); 551 | } 552 | continue; 553 | } 554 | struct Node *func_def = ParseFuncDef(decl_body); 555 | if (!func_def) { 556 | ErrorWithToken(NextToken(), "Unexpected token"); 557 | } 558 | PushToList(list, func_def); 559 | } 560 | struct Node *t; 561 | if (!(t = NextToken())) return list; 562 | ErrorWithToken(t, "Unexpected token"); 563 | } 564 | -------------------------------------------------------------------------------- /preprocessor.c: -------------------------------------------------------------------------------- 1 | #include "compilium.h" 2 | 3 | static struct Node *SkipDelimiterTokensInLogicalLine(struct Node *t) { 4 | while (t && t->token_type == kTokenDelimiter && 5 | !IsEqualTokenWithCStr(t, "\n")) 6 | t = t->next_token; 7 | return t; 8 | } 9 | 10 | static const char *CreateStrFromTokenRange(struct Node *begin, 11 | struct Node *end) { 12 | assert(begin); 13 | int len = 0; 14 | for (struct Node *t = begin; t && t != end; t = t->next_token) { 15 | len += t->length; 16 | } 17 | return strndup(begin->begin, len); 18 | } 19 | 20 | static void PreprocessRemoveBlock(void) { 21 | for (struct Node *t = PeekToken(); t; t = t->next_token) { 22 | if (!IsEqualTokenWithCStr(t, "#")) { 23 | continue; 24 | } 25 | t = SkipDelimiterTokensInLogicalLine(t->next_token); 26 | if (!IsEqualTokenWithCStr(t, "endif") && !IsEqualTokenWithCStr(t, "else")) { 27 | continue; 28 | } 29 | RemoveTokensTo(t); 30 | break; 31 | } 32 | } 33 | 34 | static struct Node *TryReadIdentListWrappedByParens(struct Node **tp) { 35 | // If ( ident_list ) is read, this function returns cloned tokens of 36 | // ident_list without commas and tp is advanced to next token. 37 | // If not, this function returns NULL and tp is unchanged. 38 | struct Node *t = *tp; 39 | if (!IsEqualTokenWithCStr(t, "(")) { 40 | return NULL; 41 | } 42 | struct Node *ident_list_head = NULL; 43 | struct Node **ident_list_last_holder = &ident_list_head; 44 | for (t = SkipDelimiterTokensInLogicalLine(t->next_token); t; 45 | t = SkipDelimiterTokensInLogicalLine(t->next_token)) { 46 | if (IsEqualTokenWithCStr(t, ")")) break; 47 | *ident_list_last_holder = DuplicateToken(t); 48 | ident_list_last_holder = &(*ident_list_last_holder)->next_token; 49 | t = SkipDelimiterTokensInLogicalLine(t->next_token); 50 | if (!IsEqualTokenWithCStr(t, ",")) break; 51 | } 52 | if (!IsEqualTokenWithCStr(t, ")")) { 53 | return NULL; 54 | } 55 | // To distinguish function-like macro with zero args and 56 | // token level replacement macro, add ) at the end of args 57 | // to ensure args is not NULL 58 | *ident_list_last_holder = DuplicateToken(t); 59 | *tp = SkipDelimiterTokensInLogicalLine(t->next_token); 60 | return ident_list_head; 61 | } 62 | 63 | static char *CreateJoinedString(const char *s1, const char *s2) { 64 | assert(s1 && s2); 65 | char *s = malloc(strlen(s1) + strlen(s2) + 1); 66 | assert(s); 67 | strcpy(s, s1); 68 | strcat(s, s2); 69 | return s; 70 | } 71 | 72 | static void PreprocessBlock(struct Node *replacement_list, int level) { 73 | struct Node *t; 74 | while (PeekToken()) { 75 | if ((t = ConsumeTokenStr("__LINE__"))) { 76 | char s[32]; 77 | snprintf(s, sizeof(s), "%d", t->line); 78 | t->token_type = kTokenIntegerConstant; 79 | t->begin = t->src_str = strdup(s); 80 | t->length = strlen(t->begin); 81 | continue; 82 | } 83 | if ((t = ReadToken(kTokenLineComment))) { 84 | while (t && !IsEqualTokenWithCStr(t, "\n")) t = t->next_token; 85 | RemoveTokensTo(t); 86 | continue; 87 | } 88 | if ((t = ReadToken(kTokenBlockCommentBegin))) { 89 | while (t && !IsTokenWithType(t, kTokenBlockCommentEnd)) t = t->next_token; 90 | if (IsTokenWithType(t, kTokenBlockCommentEnd)) t = t->next_token; 91 | RemoveTokensTo(t); 92 | continue; 93 | } 94 | if (IsEqualTokenWithCStr((t = PeekToken()), "#")) { 95 | assert(t); 96 | t = SkipDelimiterTokensInLogicalLine(t->next_token); 97 | if (IsEqualTokenWithCStr(t, "define")) { 98 | assert(t); 99 | t = SkipDelimiterTokensInLogicalLine(t->next_token); 100 | struct Node *from = t; 101 | t = t->next_token; 102 | struct Node *ident_list = TryReadIdentListWrappedByParens(&t); 103 | t = SkipDelimiterTokensInLogicalLine(t); 104 | assert(t); 105 | struct Node *to_token_head = NULL; 106 | struct Node **to_token_last_holder = &to_token_head; 107 | while (t && !IsEqualTokenWithCStr(t, "\n")) { 108 | *to_token_last_holder = DuplicateToken(t); 109 | to_token_last_holder = &(*to_token_last_holder)->next_token; 110 | t = t->next_token; 111 | } 112 | assert(IsEqualTokenWithCStr(t, "\n")); 113 | RemoveTokensTo(t->next_token); 114 | PushKeyValueToList(replacement_list, CreateTokenStr(from), 115 | CreateMacroReplacement(ident_list, to_token_head)); 116 | continue; 117 | } 118 | if (IsEqualTokenWithCStr(t, "include")) { 119 | struct Node *token_include = t; 120 | const char *fname = NULL; 121 | const char *path = NULL; 122 | t = SkipDelimiterTokensInLogicalLine(t->next_token); 123 | if (IsTokenWithType(t, kTokenStringLiteral)) { 124 | char *tmp_fname = CreateTokenStr(t); 125 | tmp_fname++; // Remove open " 126 | tmp_fname[strlen(tmp_fname) - 1] = 0; // Remove close " 127 | fname = tmp_fname; 128 | RemoveTokensTo(t->next_token); 129 | path = CreateJoinedString( 130 | "./", fname); // TODO: Make this relative to source, not cwd. 131 | } else if (IsEqualTokenWithCStr(t, "<")) { 132 | struct Node *markL = t; 133 | t = t->next_token; 134 | struct Node *begin = t; 135 | while (t && !IsEqualTokenWithCStr(t, ">")) { 136 | t = t->next_token; 137 | } 138 | if (!t) { 139 | ErrorWithToken(markL, 140 | "Unexpected EOF. > is expected to match with this."); 141 | } 142 | struct Node *end = t; 143 | fname = CreateStrFromTokenRange(begin, end); 144 | RemoveTokensTo(end->next_token); 145 | if (!include_path) { 146 | ErrorWithToken(token_include, 147 | "Include path is not provided in compiler args"); 148 | } 149 | path = CreateJoinedString(include_path, fname); 150 | } else { 151 | ErrorWithToken(t, "Expected < or \" here"); 152 | } 153 | assert(path); 154 | fprintf(stderr, "Include from: %s\n", path); 155 | FILE *fp = fopen(path, "rb"); 156 | if (!fp) { 157 | ErrorWithToken(token_include, "File not found: %s", path); 158 | } 159 | const char *include_input = ReadFile(fp); 160 | InsertTokens(Tokenize(include_input)); 161 | fclose(fp); 162 | continue; 163 | } 164 | if (IsEqualTokenWithCStr(t, "ifdef")) { 165 | struct Node *ifdef_token = t; 166 | t = SkipDelimiterTokensInLogicalLine(t->next_token); 167 | struct Node *e; 168 | bool cond = (e = GetNodeByTokenKey(replacement_list, t)); 169 | // defined 170 | t = SkipDelimiterTokensInLogicalLine(t->next_token); 171 | RemoveTokensTo(t); 172 | if (cond) { 173 | PreprocessBlock(replacement_list, level + 1); 174 | if (IsEqualTokenWithCStr(PeekToken(), "else")) { 175 | RemoveCurrentToken(); 176 | PreprocessRemoveBlock(); 177 | } 178 | } else { 179 | PreprocessRemoveBlock(); 180 | if (IsEqualTokenWithCStr(PeekToken(), "else")) { 181 | RemoveCurrentToken(); 182 | PreprocessBlock(replacement_list, level + 1); 183 | } 184 | } 185 | t = PeekToken(); 186 | if (!IsEqualTokenWithCStr(t, "endif")) { 187 | ErrorWithToken(ifdef_token, 188 | "Unexpected eof. Expected #endif to match with this."); 189 | } 190 | t = SkipDelimiterTokensInLogicalLine(t->next_token); 191 | RemoveTokensTo(t); 192 | continue; 193 | } 194 | if (IsEqualTokenWithCStr(t, "endif")) { 195 | if (level == 0) { 196 | ErrorWithToken(t, "Unexpected endif here"); 197 | } 198 | RemoveTokensTo(t); 199 | return; 200 | } 201 | if (IsEqualTokenWithCStr(t, "else")) { 202 | if (level == 0) { 203 | ErrorWithToken(t, "Unexpected else here"); 204 | } 205 | RemoveTokensTo(t); 206 | return; 207 | } 208 | ErrorWithToken(NextToken(), "Not a valid macro"); 209 | } 210 | struct Node *e; 211 | if ((e = GetNodeByTokenKey(replacement_list, (t = PeekToken())))) { 212 | assert(e->type == kNodeMacroReplacement); 213 | struct Node *rep = DuplicateTokenSequence(e->value); 214 | RemoveCurrentToken(); 215 | if (!e->arg_expr_list) { 216 | // ident replace macro case 217 | InsertTokens(rep); 218 | continue; 219 | } 220 | // function-like macro case 221 | t = SkipDelimiterTokensInLogicalLine(t->next_token); 222 | if (!IsEqualTokenWithCStr(t, "(")) ErrorWithToken(t, "Expected ( here"); 223 | t = t->next_token; 224 | struct Node *it; 225 | struct Node *arg_rep_list = AllocList(); 226 | for (it = e->arg_expr_list; it; it = it->next_token) { 227 | if (IsEqualTokenWithCStr(it, ")")) break; 228 | struct Node *arg_token_head = NULL; 229 | struct Node **arg_token_last_holder = &arg_token_head; 230 | t = SkipDelimiterTokensInLogicalLine(t); 231 | for (; t; t = t->next_token) { 232 | if (IsEqualTokenWithCStr(t, ")") || IsEqualTokenWithCStr(t, ",")) 233 | break; 234 | *arg_token_last_holder = DuplicateToken(t); 235 | arg_token_last_holder = &(*arg_token_last_holder)->next_token; 236 | } 237 | PushKeyValueToList(arg_rep_list, CreateTokenStr(it), 238 | CreateMacroReplacement(NULL, arg_token_head)); 239 | if (IsEqualTokenWithCStr(t, ")")) break; 240 | t = t->next_token; 241 | } 242 | if (!IsEqualTokenWithCStr(t, ")")) ErrorWithToken(t, "Expected ) here"); 243 | RemoveTokensTo(t->next_token); 244 | // Insert & replace args 245 | InsertTokensWithIdentReplace(rep, arg_rep_list); 246 | continue; 247 | } 248 | NextToken(); 249 | } 250 | } 251 | 252 | void Preprocess(struct Node **head_holder, struct Node *replacement_list) { 253 | InitTokenStream(head_holder); 254 | PreprocessBlock(replacement_list, 0); 255 | } 256 | -------------------------------------------------------------------------------- /struct.c: -------------------------------------------------------------------------------- 1 | #include "compilium.h" 2 | 3 | static int CalcStructSizeFromDict(struct Node *dict) { 4 | if (!GetSizeOfList(dict)) return 0; 5 | struct Node *last_kv = GetNodeAt(dict, GetSizeOfList(dict) - 1); 6 | struct Node *last_member = last_kv->value; 7 | return last_member->struct_member_ent_ofs + 8 | GetSizeOfType(last_member->struct_member_ent_type); 9 | } 10 | 11 | static int CalcNextMemberOffset(struct Node *dict, struct Node *type) { 12 | if (!GetSizeOfList(dict)) return 0; 13 | int align = GetAlignOfType(type); 14 | return (CalcStructSizeFromDict(dict) + align - 1) / align * align; 15 | } 16 | 17 | int CalcStructSize(struct Node *spec) { 18 | assert(spec && spec->type == kASTStructSpec); 19 | return CalcStructSizeFromDict(spec->struct_member_dict); 20 | } 21 | 22 | static int CalcStructAlignFromDict(struct Node *dict) { 23 | assert(dict && dict->type == kASTList); 24 | int align = 1; 25 | for (int i = 0; i < GetSizeOfList(dict); i++) { 26 | struct Node *kv = GetNodeAt(dict, i); 27 | struct Node *member_info = kv->value; 28 | assert(member_info->struct_member_ent_type); 29 | int member_align = GetAlignOfType(member_info->struct_member_ent_type); 30 | if (align < member_align) align = member_align; 31 | } 32 | return align; 33 | } 34 | 35 | int CalcStructAlign(struct Node *spec) { 36 | assert(spec && spec->type == kASTStructSpec); 37 | return CalcStructAlignFromDict(spec->struct_member_dict); 38 | } 39 | 40 | void AddMemberOfStructFromDecl(struct Node *struct_spec, struct Node *decl) { 41 | struct Node *struct_member = AllocNode(kNodeStructMember); 42 | struct_member->struct_member_decl = decl; 43 | struct Node *type = CreateTypeFromDecl(decl); 44 | assert(type && type->left); 45 | const char *name = CreateTokenStr(type->left); 46 | struct Node *dict = struct_spec->struct_member_dict; 47 | PushKeyValueToList(dict, name, struct_member); 48 | } 49 | 50 | struct Node *FindStructMember(struct Node *struct_type, 51 | struct Node *key_token) { 52 | assert(key_token->type == kNodeToken); 53 | struct_type = GetTypeWithoutAttr(struct_type); 54 | assert(struct_type && struct_type->type == kTypeStruct); 55 | assert(struct_type->type_struct_spec); 56 | assert(struct_type->type_struct_spec->struct_member_dict); 57 | assert(struct_type->type_struct_spec->struct_member_dict->type == kASTList); 58 | return GetNodeByTokenKey(struct_type->type_struct_spec->struct_member_dict, 59 | key_token); 60 | } 61 | 62 | void ResolveTypesOfMembersOfStruct(struct SymbolEntry *ctx, struct Node *spec) { 63 | if (!spec) { 64 | // Skip resolving members since it is incomplete. 65 | return; 66 | } 67 | struct Node *dict = spec->struct_member_dict; 68 | fprintf(stderr, "Resolving types of struct...\n"); 69 | struct Node *resolved_dict = AllocList(); 70 | for (int i = 0; i < GetSizeOfList(dict); i++) { 71 | struct Node *kv = GetNodeAt(dict, i); 72 | struct Node *member_info = kv->value; 73 | struct Node *type = 74 | CreateTypeFromDeclInContext(ctx, member_info->struct_member_decl); 75 | assert(type && type->left); 76 | member_info->struct_member_ent_type = GetTypeWithoutAttr(type); 77 | member_info->struct_member_ent_ofs = 78 | CalcNextMemberOffset(resolved_dict, type); 79 | PrintASTNode(member_info); 80 | PushKeyValueToList(resolved_dict, kv->key, kv->value); 81 | } 82 | spec->struct_member_dict = resolved_dict; 83 | } 84 | -------------------------------------------------------------------------------- /symbol.c: -------------------------------------------------------------------------------- 1 | #include "compilium.h" 2 | 3 | static void PushSymbol(struct SymbolEntry **prev, struct SymbolEntry *sym) { 4 | sym->prev = *prev; 5 | *prev = sym; 6 | } 7 | 8 | static struct SymbolEntry *AllocSymbolEntry(enum SymbolType type, 9 | const char *key, 10 | struct Node *value) { 11 | struct SymbolEntry *e = calloc(1, sizeof(struct SymbolEntry)); 12 | e->type = type; 13 | e->key = key; 14 | e->value = value; 15 | return e; 16 | } 17 | 18 | int GetLastLocalVarOffset(struct SymbolEntry *e) { 19 | for (; e; e = e->prev) { 20 | if (e->type != kSymbolLocalVar) continue; 21 | assert(e->value && e->value->type == kASTLocalVar); 22 | return e->value->byte_offset; 23 | } 24 | return 0; 25 | } 26 | 27 | struct Node *AddLocalVar(struct SymbolEntry **ctx, const char *key, 28 | struct Node *var_type) { 29 | assert(ctx); 30 | int ofs = GetLastLocalVarOffset(*ctx); 31 | ofs += GetSizeOfType(var_type); 32 | int align = GetSizeOfType(var_type); 33 | ofs = (ofs + align - 1) / align * align; 34 | struct Node *local_var = CreateASTLocalVar(ofs, var_type); 35 | struct SymbolEntry *e = AllocSymbolEntry(kSymbolLocalVar, key, local_var); 36 | PushSymbol(ctx, e); 37 | return local_var; 38 | } 39 | 40 | void AddGlobalVar(struct SymbolEntry **ctx, const char *key, 41 | struct Node *var_type) { 42 | fprintf(stderr, "Gvar: %s: ", key); 43 | PrintASTNode(var_type); 44 | fprintf(stderr, "\n"); 45 | assert(ctx); 46 | struct SymbolEntry *e = AllocSymbolEntry(kSymbolGlobalVar, key, var_type); 47 | PushSymbol(ctx, e); 48 | } 49 | void AddExternVar(struct SymbolEntry **ctx, const char *key, 50 | struct Node *var_type) { 51 | fprintf(stderr, "Evar: %s: ", key); 52 | PrintASTNode(var_type); 53 | fprintf(stderr, "\n"); 54 | assert(ctx); 55 | struct SymbolEntry *e = AllocSymbolEntry(kSymbolExternVar, key, var_type); 56 | PushSymbol(ctx, e); 57 | } 58 | 59 | struct Node *FindExternVar(struct SymbolEntry *e, struct Node *key_token) { 60 | // returns ASTNode which represents Type 61 | for (; e; e = e->prev) { 62 | if (e->type != kSymbolExternVar) continue; 63 | if (!IsEqualTokenWithCStr(key_token, e->key)) continue; 64 | return e->value; 65 | } 66 | return NULL; 67 | } 68 | 69 | struct Node *FindGlobalVar(struct SymbolEntry *e, struct Node *key_token) { 70 | // returns ASTNode which represents Type 71 | for (; e; e = e->prev) { 72 | if (e->type != kSymbolGlobalVar) continue; 73 | if (!IsEqualTokenWithCStr(key_token, e->key)) continue; 74 | return e->value; 75 | } 76 | return NULL; 77 | } 78 | 79 | struct Node *FindLocalVar(struct SymbolEntry *e, struct Node *key_token) { 80 | for (; e; e = e->prev) { 81 | if (e->type != kSymbolLocalVar) continue; 82 | if (!IsEqualTokenWithCStr(key_token, e->key)) continue; 83 | return e->value; 84 | } 85 | return NULL; 86 | } 87 | 88 | void AddFuncDef(struct SymbolEntry **ctx, const char *key, 89 | struct Node *func_def) { 90 | assert(ctx); 91 | struct SymbolEntry *e = AllocSymbolEntry(kSymbolFuncDef, key, func_def); 92 | PushSymbol(ctx, e); 93 | } 94 | 95 | struct Node *FindFuncDef(struct SymbolEntry *e, struct Node *key_token) { 96 | for (; e; e = e->prev) { 97 | if (e->type != kSymbolFuncDef) continue; 98 | if (!IsEqualTokenWithCStr(key_token, e->key)) continue; 99 | return e->value; 100 | } 101 | return NULL; 102 | } 103 | 104 | void AddFuncDeclType(struct SymbolEntry **ctx, const char *key, 105 | struct Node *func_decl) { 106 | assert(ctx); 107 | struct SymbolEntry *e = AllocSymbolEntry(kSymbolFuncDeclType, key, func_decl); 108 | PushSymbol(ctx, e); 109 | } 110 | 111 | struct Node *FindFuncDeclType(struct SymbolEntry *e, struct Node *key_token) { 112 | for (; e; e = e->prev) { 113 | if (e->type != kSymbolFuncDeclType) continue; 114 | if (!IsEqualTokenWithCStr(key_token, e->key)) continue; 115 | return e->value; 116 | } 117 | return NULL; 118 | } 119 | 120 | void AddStructType(struct SymbolEntry **ctx, const char *key, 121 | struct Node *type) { 122 | assert(ctx); 123 | struct SymbolEntry *e = AllocSymbolEntry(kSymbolStructType, key, type); 124 | PushSymbol(ctx, e); 125 | PrintASTNode(type); 126 | } 127 | 128 | struct Node *FindStructType(struct SymbolEntry *e, struct Node *key_token) { 129 | for (; e; e = e->prev) { 130 | if (e->type != kSymbolStructType) continue; 131 | if (!IsEqualTokenWithCStr(key_token, e->key)) continue; 132 | return e->value; 133 | } 134 | return NULL; 135 | } 136 | -------------------------------------------------------------------------------- /test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | function test_result { 4 | input="$1" 5 | expected="$2" 6 | expected_stdout="$3" 7 | testname="$4" 8 | printf "$expected_stdout" > expected.stdout 9 | ./compilium --target-os `uname` <<< "$input" > out.S || { \ 10 | echo "$input" > failcase.c; \ 11 | echo "Compilation failed."; \ 12 | exit 1; } 13 | gcc out.S 14 | actual=0 15 | ./a.out > out.stdout || actual=$? 16 | if [ $expected = $actual ]; then 17 | diff -u expected.stdout out.stdout \ 18 | && echo "PASS $testname returns $expected" \ 19 | || { echo "FAIL $testname: stdout diff"; exit 1; } 20 | else 21 | echo "FAIL $testname: expected $expected but got $actual"; echo $input > failcase.c; exit 1; 22 | fi 23 | } 24 | 25 | function test_expr_result { 26 | test_result "int main(){return $1;}" "$2" "" "$1" 27 | } 28 | 29 | function test_stmt_result { 30 | test_result "int main(){$1}" "$2" "" "$1" 31 | } 32 | 33 | function test_src_result { 34 | test_result "$1" "$2" "$3" "$1" 35 | } 36 | 37 | # nested func call with args 38 | test_src_result "`cat << EOS 39 | int g() { 40 | int a; 41 | a = 3; 42 | int b; 43 | b = 5; 44 | return a + b; 45 | } 46 | 47 | int f() { 48 | int v; 49 | v = 2; 50 | int r; 51 | r = g(); 52 | return v + r; 53 | } 54 | 55 | int main() { 56 | return f(); 57 | } 58 | EOS 59 | `" 10 '' 60 | 61 | # func args should be visible 62 | test_src_result "`cat << EOS 63 | int sum(int a, int b) { 64 | return a + b; 65 | } 66 | 67 | int main() { 68 | return sum(3, 5); 69 | } 70 | EOS 71 | `" 8 '' 72 | 73 | # same symbols in diffrent scope shadows previous one 74 | test_src_result "`cat << EOS 75 | int main() { 76 | int result; 77 | result = 1; 78 | int duplicated_var; 79 | duplicated_var = 2; 80 | result = result * duplicated_var; 81 | if(1) { 82 | int duplicated_var; 83 | duplicated_var = 3; 84 | result = result * duplicated_var; 85 | } 86 | result = result * duplicated_var; 87 | return result; 88 | } 89 | EOS 90 | `" 12 '' 91 | 92 | # for stmt 93 | test_src_result "`cat << EOS 94 | int main() { 95 | int i; 96 | int sum; 97 | sum = 0; 98 | for(i = 0; i <= 10; i = i + 1) { 99 | sum = sum + i; 100 | } 101 | return sum; 102 | } 103 | EOS 104 | `" 55 '' 105 | 106 | # return with no expression 107 | test_src_result "`cat << EOS 108 | void func_returns_void() { 109 | return; 110 | } 111 | int main() { 112 | func_returns_void(); 113 | return 3; 114 | } 115 | EOS 116 | `" 3 '' 117 | 118 | test_src_result "`cat << EOS 119 | int three() { 120 | return 3; 121 | } 122 | int main() { 123 | return three(); 124 | } 125 | EOS 126 | `" 3 '' 127 | 128 | test_src_result "`cat << EOS 129 | int puts(char *s); 130 | int main() { 131 | puts("Hello, world!"); 132 | return 0; 133 | } 134 | EOS 135 | `" 0 'Hello, world!\n' 136 | 137 | test_src_result "`cat << EOS 138 | int putchar(int c); 139 | int main() { 140 | putchar('C'); 141 | return 0; 142 | } 143 | EOS 144 | `" 0 'C' 145 | 146 | test_stmt_result 'int a; int b; int c; a = 3; b = 5; c = 7; return a + b + c;' 15 147 | 148 | # Non-printable 149 | test_expr_result ' 0 ' 0 150 | 151 | # Unary Prefix 152 | test_expr_result '+ +1' 1 153 | test_expr_result '- -17' 17 154 | 155 | echo "All tests passed." 156 | -------------------------------------------------------------------------------- /test_preprocess.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | function test_stdout { 4 | input="$1" 5 | expected_stdout="$2" 6 | testname="$3" 7 | printf "%s" "$expected_stdout" > expected.stdout 8 | printf "%s" "$input" > testinput.c 9 | cat testinput.c | ./compilium -E --target-os `uname` > out.stdout || { \ 10 | echo "$input" > failcase.txt; \ 11 | echo "Compilation failed."; \ 12 | exit 1; } 13 | diff -y expected.stdout out.stdout \ 14 | && printf "\nPASS $testname\n" \ 15 | || { printf "\nFAIL $testname: stdout diff\n"; exit 1; } 16 | } 17 | 18 | test_stdout \ 19 | "`cat << EOS 20 | #define MACRO_DEFINED 21 | #ifdef MACRO_DEFINED 22 | int this_should_be_visible_1; 23 | #else 24 | int this_should_not_be_visible_1; 25 | #endif 26 | #ifdef MACRO_NOT_DEFINED 27 | int this_should_not_be_visible_2; 28 | #else 29 | int this_should_be_visible_2; 30 | #endif 31 | int always_visible; 32 | EOS 33 | `" \ 34 | "`cat << EOS 35 | 36 | int this_should_be_visible_1; 37 | 38 | 39 | int this_should_be_visible_2; 40 | 41 | int always_visible; 42 | 43 | EOS 44 | `" \ 45 | 'ifdef nested case' 46 | 47 | test_stdout \ 48 | "`cat << EOS 49 | #define MACRO_DEFINED 50 | #ifdef MACRO_DEFINED 51 | int this_should_be_visible; 52 | #ifdef MACRO_DEFINED 53 | #ifdef MACRO_NOT_DEFINED 54 | int this_is_not_visible; 55 | #endif 56 | int this_is_also_visible; 57 | #endif 58 | #endif 59 | int always_visible; 60 | EOS 61 | `" \ 62 | "`cat << EOS 63 | 64 | int this_should_be_visible; 65 | 66 | 67 | int this_is_also_visible; 68 | 69 | 70 | int always_visible; 71 | 72 | EOS 73 | `" \ 74 | 'ifdef nested case' 75 | 76 | test_stdout \ 77 | "`cat << EOS 78 | #ifdef MACRO_NOT_DEFINED 79 | int this_is_not_visible; 80 | #endif 81 | int always_visible; 82 | EOS 83 | `" \ 84 | "`cat << EOS 85 | 86 | int always_visible; 87 | 88 | EOS 89 | `" \ 90 | 'ifdef not defined case' 91 | 92 | test_stdout \ 93 | "`cat << EOS 94 | #define MACRO_DEFINED 95 | #ifdef MACRO_DEFINED 96 | int this_should_be_visible; 97 | #endif 98 | int always_visible; 99 | EOS 100 | `" \ 101 | "`cat << EOS 102 | 103 | int this_should_be_visible; 104 | 105 | int always_visible; 106 | 107 | EOS 108 | `" \ 109 | 'ifdef defined case' 110 | 111 | test_stdout \ 112 | "`cat << EOS 113 | EOS 114 | `" \ 115 | "`cat << EOS 116 | EOS 117 | `" \ 118 | 'empty input becomes empty' 119 | 120 | test_stdout \ 121 | "`cat << EOS 122 | int one; 123 | 124 | int two; 125 | int three; 126 | EOS 127 | `" \ 128 | "`cat << EOS 129 | int one; 130 | 131 | int two; 132 | int three; 133 | EOS 134 | `" \ 135 | 'keep white spaces and new lines' 136 | 137 | # 6.10.8.1 Mandatory macros - 1 138 | # 5.1.1.2 Translation phases - 2 139 | # 140 | # $ printf 'one\\\n __LINE__ two' | clang -E - 141 | # one 2 two 142 | # $ printf 'one\n __LINE__ two' | clang -E - 143 | # one 144 | # 2 two 145 | 146 | test_stdout \ 147 | "`cat << EOS 148 | one\\\\ 149 | __LINE__ two 150 | EOS 151 | `" \ 152 | "`cat << EOS 153 | one 2 two 154 | EOS 155 | `" \ 156 | '__LINE__ macro shows physical source line, not logical one.' 157 | 158 | test_stdout \ 159 | "`cat << EOS 160 | one 161 | __LINE__ two 162 | three__LINE__ 163 | four __LINE__ __LINE__ 164 | EOS 165 | `" \ 166 | "`cat << EOS 167 | one 168 | 2 two 169 | three__LINE__ 170 | four 4 4 171 | EOS 172 | `" \ 173 | '__LINE__ macro' 174 | 175 | test_stdout \ 176 | "`cat << EOS 177 | #define hello "Hello, world!" 178 | printf(hello); 179 | EOS 180 | `" \ 181 | "`cat << EOS 182 | printf("Hello, world!"); 183 | EOS 184 | `" \ 185 | 'Simple macro replacement' 186 | 187 | test_stdout \ 188 | "`cat << EOS 189 | #define hello printf("Hello, world!") 190 | hello; 191 | #define EOF (-1) 192 | EOF; 193 | EOS 194 | `" \ 195 | "`cat << EOS 196 | printf("Hello, world!"); 197 | (-1); 198 | EOS 199 | `" \ 200 | 'Simple macro replacement with multiple tokens' 201 | 202 | test_stdout \ 203 | "`cat << EOS 204 | #define hello 205 | hello; 206 | EOS 207 | `" \ 208 | "`cat << EOS 209 | ; 210 | EOS 211 | `" \ 212 | 'Simple macro replacement with zero tokens' 213 | 214 | test_stdout \ 215 | "`cat << EOS 216 | #define f0() printf("Zero") 217 | #define f1(a) printf("One %d", a) 218 | #define f2(a, b) printf("Two %d %d", a, b) 219 | f0(); 220 | f1(1 + 1); 221 | f2(1 + 1, 3); 222 | EOS 223 | `" \ 224 | "`cat << EOS 225 | printf("Zero"); 226 | printf("One %d", 1 + 1); 227 | printf("Two %d %d", 1 + 1, 3); 228 | EOS 229 | `" \ 230 | 'Function-like macros' 231 | 232 | test_stdout \ 233 | "`cat << EOS 234 | #define f1(a) printf("One %s", #a) 235 | #define f2(a, b) printf("Two %s %d", #a, b) 236 | f1(1 + 1); 237 | f2(1 + 1, 3); 238 | EOS 239 | `" \ 240 | "`cat << EOS 241 | printf("One %s", "1 + 1"); 242 | printf("Two %s %d", "1 + 1", 3); 243 | EOS 244 | `" \ 245 | 'Function-like macros with #expr macro' 246 | -------------------------------------------------------------------------------- /token.c: -------------------------------------------------------------------------------- 1 | #include "compilium.h" 2 | 3 | bool IsToken(struct Node *n) { return n && n->type == kNodeToken; } 4 | 5 | bool IsTokenWithType(struct Node *n, enum TokenType token_type) { 6 | return IsToken(n) && n->token_type == token_type; 7 | } 8 | 9 | struct Node *AllocToken(const char *src_str, int line, const char *begin, 10 | int length, enum TokenType type) { 11 | struct Node *t = AllocNode(kNodeToken); 12 | t->begin = begin; 13 | t->length = length; 14 | t->token_type = type; 15 | t->src_str = src_str; 16 | t->line = line; 17 | return t; 18 | } 19 | 20 | struct Node *DuplicateToken(struct Node *base_token) { 21 | assert(IsToken(base_token)); 22 | struct Node *t = AllocNode(kNodeToken); 23 | memcpy(t, base_token, sizeof(*t)); 24 | t->next_token = NULL; 25 | return t; 26 | } 27 | 28 | struct Node *DuplicateTokenSequence(struct Node *base_head) { 29 | struct Node *dup_head = NULL; 30 | struct Node **dup_head_holder = &dup_head; 31 | while (base_head) { 32 | *dup_head_holder = DuplicateToken(base_head); 33 | dup_head_holder = &(*dup_head_holder)->next_token; 34 | base_head = base_head->next_token; 35 | } 36 | return dup_head; 37 | } 38 | 39 | char *CreateTokenStr(struct Node *t) { 40 | assert(IsToken(t)); 41 | return strndup(t->begin, t->length); 42 | } 43 | 44 | int IsEqualTokenWithCStr(struct Node *t, const char *s) { 45 | return IsToken(t) && strlen(s) == (unsigned)t->length && 46 | strncmp(t->begin, s, t->length) == 0; 47 | } 48 | 49 | void PrintTokenSequence(struct Node *t) { 50 | if (!t) return; 51 | assert(IsToken(t)); 52 | for (; t; t = t->next_token) { 53 | if (t->token_type == kTokenZeroWidthNoBreakSpace) { 54 | continue; 55 | } 56 | fprintf(stderr, "%.*s", t->length, t->begin); 57 | } 58 | } 59 | 60 | void OutputTokenSequenceAsCSource(struct Node *t) { 61 | if (!t) return; 62 | assert(IsToken(t)); 63 | for (; t; t = t->next_token) { 64 | if (t->token_type == kTokenZeroWidthNoBreakSpace) { 65 | continue; 66 | } 67 | fprintf(stdout, "%.*s", t->length, t->begin); 68 | } 69 | } 70 | 71 | void PrintToken(struct Node *t) { 72 | fprintf(stderr, "(Token %.*s type=%d)", t->length, t->begin, t->token_type); 73 | } 74 | 75 | void PrintTokenBrief(struct Node *t) { 76 | assert(t); 77 | if (t->token_type == kTokenStringLiteral || 78 | t->token_type == kTokenCharLiteral) { 79 | fprintf(stderr, "%.*s", t->length, t->begin); 80 | return; 81 | } 82 | fprintf(stderr, "<%.*s>", t->length, t->begin); 83 | } 84 | 85 | void PrintTokenStrToFile(struct Node *t, FILE *fp) { 86 | fprintf(fp, "%.*s", t->length, t->begin); 87 | } 88 | 89 | static bool ShouldRemoveToken(struct Node *t) { 90 | return t->token_type == kTokenDelimiter || 91 | t->token_type == kTokenZeroWidthNoBreakSpace; 92 | } 93 | 94 | // Token stream 95 | 96 | static struct Node **next_token_holder; 97 | 98 | void InitTokenStream(struct Node **head_token_holder) { 99 | assert(head_token_holder); 100 | next_token_holder = head_token_holder; 101 | } 102 | 103 | static void AdvanceTokenStream(void) { 104 | if (!*next_token_holder) return; 105 | next_token_holder = &(*next_token_holder)->next_token; 106 | } 107 | 108 | struct Node *PeekToken(void) { 109 | assert(next_token_holder); 110 | return *next_token_holder; 111 | } 112 | 113 | struct Node *ReadToken(enum TokenType type) { 114 | struct Node *t = *next_token_holder; 115 | if (!t || !IsTokenWithType(t, type)) return NULL; 116 | return t; 117 | } 118 | 119 | struct Node *ConsumeToken(enum TokenType type) { 120 | struct Node *t = *next_token_holder; 121 | if (!t || !IsTokenWithType(t, type)) return NULL; 122 | AdvanceTokenStream(); 123 | return t; 124 | } 125 | 126 | struct Node *ConsumeTokenStr(const char *s) { 127 | struct Node *t = *next_token_holder; 128 | if (!t || !IsEqualTokenWithCStr(t, s)) return NULL; 129 | AdvanceTokenStream(); 130 | return t; 131 | } 132 | 133 | struct Node *ExpectTokenStr(const char *s) { 134 | struct Node *t = *next_token_holder; 135 | if (!t) Error("Expect token %s but got EOF", s); 136 | if (!ConsumeTokenStr(s)) ErrorWithToken(t, "Expected token %s here", s); 137 | return t; 138 | } 139 | 140 | struct Node *ConsumePunctuator(const char *s) { 141 | struct Node *t = *next_token_holder; 142 | if (!t || !IsTokenWithType(t, kTokenPunctuator) || 143 | !IsEqualTokenWithCStr(t, s)) 144 | return NULL; 145 | AdvanceTokenStream(); 146 | return t; 147 | } 148 | 149 | struct Node *ExpectPunctuator(const char *s) { 150 | struct Node *t = *next_token_holder; 151 | if (!t) Error("Expect token %s but got EOF", s); 152 | if (!ConsumePunctuator(s)) ErrorWithToken(t, "Expected token %s here", s); 153 | return t; 154 | } 155 | 156 | struct Node *NextToken(void) { 157 | struct Node *t = *next_token_holder; 158 | AdvanceTokenStream(); 159 | return t; 160 | } 161 | 162 | void RemoveCurrentToken(void) { 163 | if (!*next_token_holder) return; 164 | *next_token_holder = (*next_token_holder)->next_token; 165 | } 166 | 167 | void RemoveTokensTo(struct Node *end) { 168 | while (*next_token_holder && *next_token_holder != end) { 169 | RemoveCurrentToken(); 170 | } 171 | } 172 | 173 | void InsertTokens(struct Node *seq_first) { 174 | // Insert token sequece (seq) at current cursor pos. 175 | if (!IsToken(seq_first)) return; 176 | struct Node *seq_last = seq_first; 177 | while (seq_last->next_token) seq_last = seq_last->next_token; 178 | seq_last->next_token = PeekToken(); 179 | *next_token_holder = seq_first; 180 | } 181 | 182 | static struct Node *CreateStringLiteralOfTokens(struct Node *head) { 183 | assert(IsToken(head)); 184 | int len = 0; 185 | for (struct Node *t = head; t; t = t->next_token) { 186 | len += t->length; 187 | } 188 | char *s = malloc(len + 1 + 2); 189 | assert(s); 190 | char *p = s; 191 | *p = '"'; 192 | p++; 193 | for (struct Node *t = head; t; t = t->next_token) { 194 | for (int i = 0; i < t->length; i++) { 195 | *p = t->begin[i]; 196 | p++; 197 | } 198 | } 199 | *p = '"'; 200 | p++; 201 | *p = 0; 202 | return AllocToken(s, 0, s, len + 2, kTokenStringLiteral); 203 | } 204 | 205 | void InsertTokensWithIdentReplace(struct Node *seq, struct Node *rep_list) { 206 | // Insert token sequece (seq) at current cursor pos. 207 | // if seq contains token in rep_list, replace it with tokens rep_list[token]; 208 | // elements of seq will be inserted directly. 209 | if (!IsToken(seq)) return; 210 | struct Node **next_holder = next_token_holder; 211 | while (seq) { 212 | struct Node *e; 213 | if (IsEqualTokenWithCStr(seq, "#") && seq->next_token && 214 | (e = GetNodeByTokenKey(rep_list, seq->next_token))) { 215 | struct Node *st = CreateStringLiteralOfTokens(e->value); 216 | seq = seq->next_token->next_token; 217 | // 218 | st->next_token = *next_holder; 219 | *next_holder = st; 220 | next_holder = &st->next_token; 221 | continue; 222 | } 223 | if (!(e = GetNodeByTokenKey(rep_list, seq))) { 224 | // no replace 225 | struct Node *n = seq; 226 | seq = seq->next_token; 227 | // 228 | n->next_token = *next_holder; 229 | *next_holder = n; 230 | next_holder = &(*next_holder)->next_token; 231 | continue; 232 | } 233 | struct Node *n = DuplicateTokenSequence(e->value); 234 | struct Node *n_last = n; 235 | while (n_last->next_token) n_last = n_last->next_token; 236 | seq = seq->next_token; 237 | // 238 | n_last->next_token = *next_holder; 239 | *next_holder = n; 240 | next_holder = &n_last->next_token; 241 | } 242 | } 243 | 244 | struct Node **RemoveDelimiterTokens(struct Node **head_holder) { 245 | InitTokenStream(head_holder); 246 | for (;;) { 247 | struct Node *t = PeekToken(); 248 | if (!t) break; 249 | if (ShouldRemoveToken(t)) { 250 | RemoveCurrentToken(); 251 | continue; 252 | } 253 | AdvanceTokenStream(); 254 | } 255 | return head_holder; 256 | } 257 | -------------------------------------------------------------------------------- /tokenizer.c: -------------------------------------------------------------------------------- 1 | #include "compilium.h" 2 | 3 | static struct Node *CreateNextToken(const char *p, const char *src, int *line) { 4 | assert(line); 5 | if (!*p) return NULL; 6 | if (*p == ' ') { 7 | return AllocToken(src, *line, p, 1, kTokenDelimiter); 8 | } 9 | if (*p == '\n') { 10 | (*line)++; 11 | return AllocToken(src, *line, p, 1, kTokenDelimiter); 12 | } 13 | if (p[0] == '\\' && p[1] == '\n') { 14 | (*line)++; 15 | return AllocToken(src, *line, p, 2, kTokenZeroWidthNoBreakSpace); 16 | } 17 | if ('1' <= *p && *p <= '9') { 18 | int length = 0; 19 | while ('0' <= p[length] && p[length] <= '9') { 20 | length++; 21 | } 22 | return AllocToken(src, *line, p, length, kTokenIntegerConstant); 23 | } else if ('0' == *p) { 24 | int length = 0; 25 | if (p[1] == 'x') { 26 | // Hexadecimal 27 | length += 2; 28 | while (('0' <= p[length] && p[length] <= '9') || 29 | ('A' <= p[length] && p[length] <= 'F') || 30 | ('a' <= p[length] && p[length] <= 'f')) { 31 | length++; 32 | } 33 | } else { 34 | // Octal 35 | while ('0' <= p[length] && p[length] <= '7') { 36 | length++; 37 | } 38 | } 39 | return AllocToken(src, *line, p, length, kTokenIntegerConstant); 40 | } else if (('A' <= *p && *p <= 'Z') || ('a' <= *p && *p <= 'z') || 41 | *p == '_') { 42 | int length = 0; 43 | while (('A' <= p[length] && p[length] <= 'Z') || 44 | ('a' <= p[length] && p[length] <= 'z') || p[length] == '_' || 45 | ('0' <= p[length] && p[length] <= '9')) { 46 | length++; 47 | } 48 | struct Node *t = AllocToken(src, *line, p, length, kTokenIdent); 49 | if (IsEqualTokenWithCStr(t, "break")) t->token_type = kTokenKwBreak; 50 | if (IsEqualTokenWithCStr(t, "char")) t->token_type = kTokenKwChar; 51 | if (IsEqualTokenWithCStr(t, "const")) t->token_type = kTokenKwConst; 52 | if (IsEqualTokenWithCStr(t, "continue")) t->token_type = kTokenKwContinue; 53 | if (IsEqualTokenWithCStr(t, "else")) t->token_type = kTokenKwElse; 54 | if (IsEqualTokenWithCStr(t, "extern")) t->token_type = kTokenKwExtern; 55 | if (IsEqualTokenWithCStr(t, "for")) t->token_type = kTokenKwFor; 56 | if (IsEqualTokenWithCStr(t, "if")) t->token_type = kTokenKwIf; 57 | if (IsEqualTokenWithCStr(t, "int")) t->token_type = kTokenKwInt; 58 | if (IsEqualTokenWithCStr(t, "long")) t->token_type = kTokenKwLong; 59 | if (IsEqualTokenWithCStr(t, "return")) t->token_type = kTokenKwReturn; 60 | if (IsEqualTokenWithCStr(t, "sizeof")) t->token_type = kTokenKwSizeof; 61 | if (IsEqualTokenWithCStr(t, "static")) t->token_type = kTokenKwStatic; 62 | if (IsEqualTokenWithCStr(t, "struct")) t->token_type = kTokenKwStruct; 63 | if (IsEqualTokenWithCStr(t, "typedef")) t->token_type = kTokenKwTypedef; 64 | if (IsEqualTokenWithCStr(t, "unsigned")) t->token_type = kTokenKwUnsigned; 65 | if (IsEqualTokenWithCStr(t, "void")) t->token_type = kTokenKwVoid; 66 | if (IsEqualTokenWithCStr(t, "while")) t->token_type = kTokenKwWhile; 67 | return t; 68 | } else if ('\'' == *p) { 69 | int length = 1; 70 | while (p[length] && p[length] != '\'') { 71 | if (p[length] == '\\' && p[length + 1]) { 72 | length++; 73 | } 74 | length++; 75 | } 76 | if (p[length] != '\'') { 77 | Error("Expected end of char literal (')"); 78 | } 79 | length++; 80 | return AllocToken(src, *line, p, length, kTokenCharLiteral); 81 | } else if ('"' == *p) { 82 | int length = 1; 83 | while (p[length] && p[length] != '"') { 84 | if (p[length] == '\\' && p[length + 1]) { 85 | length++; 86 | } 87 | length++; 88 | } 89 | if (p[length] != '"') { 90 | Error("Expected end of string literal (\")"); 91 | } 92 | length++; 93 | return AllocToken(src, *line, p, length, kTokenStringLiteral); 94 | } else if ('#' == *p) { 95 | if (p[1] == '#') { 96 | return AllocToken(src, *line, p, 2, kTokenPunctuator); 97 | } 98 | return AllocToken(src, *line, p, 1, kTokenPunctuator); 99 | } else if ('&' == *p) { 100 | if (p[1] == '&') { 101 | return AllocToken(src, *line, p, 2, kTokenPunctuator); 102 | } 103 | return AllocToken(src, *line, p, 1, kTokenPunctuator); 104 | } else if ('|' == *p) { 105 | if (p[1] == '|') { 106 | return AllocToken(src, *line, p, 2, kTokenPunctuator); 107 | } 108 | return AllocToken(src, *line, p, 1, kTokenPunctuator); 109 | } else if ('<' == *p) { 110 | if (p[1] == '<') { 111 | if (p[2] == '=') { 112 | return AllocToken(src, *line, p, 3, kTokenPunctuator); 113 | } 114 | return AllocToken(src, *line, p, 2, kTokenPunctuator); 115 | } else if (p[1] == '=') { 116 | return AllocToken(src, *line, p, 2, kTokenPunctuator); 117 | } 118 | return AllocToken(src, *line, p, 1, kTokenPunctuator); 119 | } else if ('>' == *p) { 120 | if (p[1] == '>') { 121 | if (p[2] == '=') { 122 | return AllocToken(src, *line, p, 3, kTokenPunctuator); 123 | } 124 | return AllocToken(src, *line, p, 2, kTokenPunctuator); 125 | } else if (p[1] == '=') { 126 | return AllocToken(src, *line, p, 2, kTokenPunctuator); 127 | } 128 | return AllocToken(src, *line, p, 1, kTokenPunctuator); 129 | } else if ('=' == *p) { 130 | if (p[1] == '=') { 131 | return AllocToken(src, *line, p, 2, kTokenPunctuator); 132 | } 133 | return AllocToken(src, *line, p, 1, kTokenPunctuator); 134 | } else if ('!' == *p) { 135 | if (p[1] == '=') { 136 | return AllocToken(src, *line, p, 2, kTokenPunctuator); 137 | } 138 | return AllocToken(src, *line, p, 1, kTokenPunctuator); 139 | } else if ('^' == *p) { 140 | return AllocToken(src, *line, p, 1, kTokenPunctuator); 141 | } else if ('+' == *p) { 142 | if (p[1] == '+') { 143 | return AllocToken(src, *line, p, 2, kTokenPunctuator); 144 | } 145 | if (p[1] == '=') { 146 | return AllocToken(src, *line, p, 2, kTokenPunctuator); 147 | } 148 | return AllocToken(src, *line, p, 1, kTokenPunctuator); 149 | } else if ('-' == *p) { 150 | if (p[1] == '-' || p[1] == '=' || p[1] == '>') { 151 | return AllocToken(src, *line, p, 2, kTokenPunctuator); 152 | } 153 | return AllocToken(src, *line, p, 1, kTokenPunctuator); 154 | } else if ('*' == *p) { 155 | if (p[1] == '/') { 156 | return AllocToken(src, *line, p, 2, kTokenBlockCommentEnd); 157 | } 158 | if (p[1] == '=') { 159 | return AllocToken(src, *line, p, 2, kTokenPunctuator); 160 | } 161 | return AllocToken(src, *line, p, 1, kTokenPunctuator); 162 | } else if ('/' == *p) { 163 | if (p[1] == '/') { 164 | return AllocToken(src, *line, p, 2, kTokenLineComment); 165 | } 166 | if (p[1] == '*') { 167 | return AllocToken(src, *line, p, 2, kTokenBlockCommentBegin); 168 | } 169 | if (p[1] == '=') { 170 | return AllocToken(src, *line, p, 2, kTokenPunctuator); 171 | } 172 | return AllocToken(src, *line, p, 1, kTokenPunctuator); 173 | } else if ('%' == *p) { 174 | if (p[1] == '=') { 175 | return AllocToken(src, *line, p, 2, kTokenPunctuator); 176 | } 177 | return AllocToken(src, *line, p, 1, kTokenPunctuator); 178 | } else if ('~' == *p) { 179 | return AllocToken(src, *line, p, 1, kTokenPunctuator); 180 | } else if ('?' == *p) { 181 | return AllocToken(src, *line, p, 1, kTokenPunctuator); 182 | } else if (':' == *p) { 183 | return AllocToken(src, *line, p, 1, kTokenPunctuator); 184 | } else if (',' == *p) { 185 | return AllocToken(src, *line, p, 1, kTokenPunctuator); 186 | } else if (';' == *p) { 187 | return AllocToken(src, *line, p, 1, kTokenPunctuator); 188 | } else if ('{' == *p) { 189 | return AllocToken(src, *line, p, 1, kTokenPunctuator); 190 | } else if ('}' == *p) { 191 | return AllocToken(src, *line, p, 1, kTokenPunctuator); 192 | } else if ('(' == *p) { 193 | return AllocToken(src, *line, p, 1, kTokenPunctuator); 194 | } else if (')' == *p) { 195 | return AllocToken(src, *line, p, 1, kTokenPunctuator); 196 | } else if ('.' == *p) { 197 | if (p[1] == '.' && p[2] == '.') { 198 | return AllocToken(src, *line, p, 3, kTokenPunctuator); 199 | } 200 | return AllocToken(src, *line, p, 1, kTokenPunctuator); 201 | } else if ('[' == *p) { 202 | return AllocToken(src, *line, p, 1, kTokenPunctuator); 203 | } else if (']' == *p) { 204 | return AllocToken(src, *line, p, 1, kTokenPunctuator); 205 | } 206 | return AllocToken(src, *line, p, 1, kTokenUnknownChar); 207 | } 208 | 209 | struct Node *CreateToken(const char *input) { 210 | int line = 1; 211 | return CreateNextToken(input, input, &line); 212 | } 213 | 214 | struct Node *Tokenize(const char *input) { 215 | // returns head of tokens. 216 | struct Node *token_head = NULL; 217 | struct Node **last_next_token = &token_head; 218 | const char *p = input; 219 | struct Node *t; 220 | int line = 1; 221 | while ((t = CreateNextToken(p, input, &line))) { 222 | *last_next_token = t; 223 | last_next_token = &t->next_token; 224 | p = t->begin + t->length; 225 | } 226 | return token_head; 227 | } 228 | -------------------------------------------------------------------------------- /type.c: -------------------------------------------------------------------------------- 1 | #include "compilium.h" 2 | 3 | int IsSameTypeExceptAttr(struct Node *a, struct Node *b) { 4 | assert(a && b); 5 | a = GetTypeWithoutAttr(a); 6 | b = GetTypeWithoutAttr(b); 7 | if (a->type != b->type) return 0; 8 | if (a->type == kTypeBase) { 9 | assert(a->op && b->op); 10 | return a->op->type == b->op->type; 11 | } else if (a->type == kTypePointer) { 12 | return IsSameTypeExceptAttr(a->right, b->right); 13 | } else if (a->type == kTypeFunction) { 14 | if (!IsSameTypeExceptAttr(a->left, b->left)) return 0; 15 | if (GetSizeOfList(a->right) != GetSizeOfList(b->right)) return 0; 16 | for (int i = 0; i < GetSizeOfList(a->right); i++) { 17 | if (!IsSameTypeExceptAttr(GetNodeAt(a->right, i), GetNodeAt(b->right, i))) 18 | return 0; 19 | } 20 | return 1; 21 | } 22 | Error("IsSameTypeExceptAttr: Comparing non-type nodes"); 23 | } 24 | 25 | struct Node *GetTypeWithoutAttr(struct Node *t) { 26 | if (!t) return NULL; 27 | if (t->type != kTypeLValue && t->type != kTypeAttrIdent) return t; 28 | return GetTypeWithoutAttr(t->right); 29 | } 30 | 31 | struct Node *GetIdentifierTokenFromTypeAttr(struct Node *t) { 32 | if (!t || t->type != kTypeAttrIdent) return NULL; 33 | return t->left; 34 | } 35 | 36 | struct Node *GetRValueType(struct Node *t) { 37 | if (!t) return NULL; 38 | if (t->type != kTypeLValue) return t; 39 | return t->right; 40 | } 41 | 42 | int IsLValueType(struct Node *t) { return t && t->type == kTypeLValue; } 43 | 44 | int IsAssignable(struct Node *dst, struct Node *src) { 45 | assert(dst && src); 46 | if (dst->type != kTypeLValue) return 0; 47 | return IsSameTypeExceptAttr(GetRValueType(dst), src); 48 | } 49 | 50 | int EvalExprAsInt(struct Node *n) { 51 | assert(n); 52 | if (IsTokenWithType(n->op, kTokenIntegerConstant)) { 53 | return strtol(n->op->begin, NULL, 0); 54 | } 55 | if (n->type == kASTExpr && IsEqualTokenWithCStr(n->op, "+")) { 56 | return EvalExprAsInt(n->left) + EvalExprAsInt(n->right); 57 | } 58 | assert(false); 59 | } 60 | 61 | int GetSizeOfType(struct Node *t) { 62 | t = GetTypeWithoutAttr(t); 63 | assert(t); 64 | if (t->type == kTypeBase) { 65 | assert(IsToken(t->op)); 66 | switch (t->op->token_type) { 67 | case kTokenKwInt: 68 | case kTokenKwLong: 69 | return 4; 70 | case kTokenKwChar: 71 | return 1; 72 | case kTokenKwVoid: 73 | return 0; 74 | default: 75 | PrintASTNode(t->op); 76 | assert(false); 77 | } 78 | } else if (t->type == kTypePointer) { 79 | return 8; 80 | } else if (t->type == kTypeStruct) { 81 | if (!t->type_struct_spec) { 82 | ErrorWithToken(t->tag, "Cannot take sizeof incomplete struct"); 83 | } 84 | return CalcStructSize(t->type_struct_spec); 85 | } else if (t->type == kTypeArray) { 86 | return GetSizeOfType(t->type_array_type_of) * 87 | EvalExprAsInt(t->type_array_index_decl); 88 | } 89 | PrintASTNode(t); 90 | assert(false); 91 | } 92 | 93 | int GetAlignOfType(struct Node *t) { 94 | t = GetTypeWithoutAttr(t); 95 | assert(t); 96 | if (t->type == kTypeBase) { 97 | assert(IsToken(t->op)); 98 | switch (t->op->token_type) { 99 | case kTokenKwInt: 100 | return 4; 101 | case kTokenKwChar: 102 | return 1; 103 | default: 104 | assert(false); 105 | } 106 | } else if (t->type == kTypePointer) { 107 | return 8; 108 | } else if (t->type == kTypeStruct) { 109 | return CalcStructAlign(t->type_struct_spec); 110 | } 111 | PrintASTNode(t); 112 | assert(false); 113 | } 114 | 115 | struct Node *CreateTypeFromDecl(struct Node *decl); 116 | struct Node *CreateType(struct Node *decl_spec, struct Node *decltor); 117 | struct Node *CreateTypeFromDecltor(struct Node *decltor, struct Node *type) { 118 | assert(decltor && decltor->type == kASTDecltor); 119 | struct Node *pointer = decltor->left; 120 | if (pointer) { 121 | struct Node *p = pointer; 122 | while (p->right) { 123 | p = p->right; 124 | } 125 | p->right = type; 126 | type = pointer; 127 | } 128 | for (struct Node *dd = decltor->right; dd; dd = dd->left) { 129 | assert(dd->type == kASTDirectDecltor); 130 | if (dd->left) { 131 | assert(dd->op); 132 | if (IsEqualTokenWithCStr(dd->op, "(")) { 133 | // direct-declarator ( parameter-type-list | identifier-list_opt ) 134 | struct Node *arg_type_list = AllocList(); 135 | for (int i = 0; i < GetSizeOfList(dd->right); i++) { 136 | struct Node *arg = GetNodeAt(dd->right, i); 137 | if (IsEqualTokenWithCStr(arg, "...")) { 138 | if (i != GetSizeOfList(dd->right) - 1) { 139 | ErrorWithToken(arg, 140 | "va arg is only allowed at the end of params."); 141 | } 142 | PushToList(arg_type_list, arg); 143 | break; 144 | } 145 | PushToList(arg_type_list, CreateTypeFromDecl(arg)); 146 | } 147 | type = CreateTypeFunction(type, arg_type_list); 148 | continue; 149 | } 150 | if (IsEqualTokenWithCStr(dd->op, "[")) { 151 | // direct-declarator [ list ] 152 | type = CreateTypeArray(type, dd->right); 153 | continue; 154 | } 155 | assert(false); 156 | } 157 | assert(!dd->left); 158 | if (IsEqualTokenWithCStr(dd->op, "(")) { 159 | assert(dd->value && dd->value->type == kASTDecltor); 160 | type = CreateTypeFromDecltor(dd->value, type); 161 | continue; 162 | } 163 | assert(IsTokenWithType(dd->op, kTokenIdent)); 164 | type = CreateTypeAttrIdent(dd->op, type); 165 | } 166 | return type; 167 | } 168 | 169 | static struct Node *CreateBaseTypeFromDeclSpecs(struct SymbolEntry *ctx, 170 | struct Node *decl_specs) { 171 | // 6.2.5 Types 172 | assert(IsASTList(decl_specs)); 173 | struct Node *type_qual = NULL; 174 | struct Node *type_spec = NULL; 175 | for (int i = 0; i < GetSizeOfList(decl_specs); i++) { 176 | struct Node *t = GetNodeAt(decl_specs, i); 177 | if (IsTokenWithType(t, kTokenKwTypedef) || 178 | IsTokenWithType(t, kTokenKwUnsigned) || 179 | IsTokenWithType(t, kTokenKwExtern)) { 180 | continue; 181 | } 182 | if (IsTokenWithType(t, kTokenKwConst)) { 183 | assert(!type_qual); 184 | type_qual = t; 185 | continue; 186 | } 187 | assert(!type_spec); 188 | type_spec = t; 189 | } 190 | assert(type_spec); 191 | if (IsToken(type_spec)) { 192 | if (!IsTokenWithType(type_spec, kTokenKwInt) && 193 | !IsTokenWithType(type_spec, kTokenKwChar) && 194 | !IsTokenWithType(type_spec, kTokenKwLong) && 195 | !IsEqualTokenWithCStr(type_spec, "__builtin_va_list") && 196 | !IsTokenWithType(type_spec, kTokenKwVoid)) { 197 | ErrorWithToken(type_spec, "Unexpected token for base type specifier"); 198 | } 199 | return CreateTypeBase(type_spec); 200 | } 201 | if (type_spec->type == kASTStructSpec) { 202 | if (!type_spec->struct_member_dict) { 203 | assert(type_spec->tag); 204 | struct Node *resolved_type = FindStructType(ctx, type_spec->tag); 205 | if (resolved_type) return resolved_type; 206 | return CreateTypeStruct(type_spec->tag, NULL); 207 | } 208 | return CreateTypeStruct(type_spec->tag, type_spec); 209 | } 210 | if (type_spec->type == kTypeStruct || type_spec->type == kTypeBase) { 211 | // typedef_type 212 | return type_spec; 213 | } 214 | PrintASTNode(decl_specs); 215 | assert(false); 216 | } 217 | 218 | struct Node *CreateTypeInContext(struct SymbolEntry *ctx, 219 | struct Node *decl_specs, 220 | struct Node *decltor) { 221 | struct Node *type = CreateBaseTypeFromDeclSpecs(ctx, decl_specs); 222 | if (!decltor) return type; 223 | return CreateTypeFromDecltor(decltor, type); 224 | } 225 | 226 | struct Node *CreateType(struct Node *decl_spec, struct Node *decltor) { 227 | return CreateTypeInContext(NULL, decl_spec, decltor); 228 | } 229 | 230 | struct Node *CreateTypeFromDecl(struct Node *decl) { 231 | assert(decl && decl->type == kASTDecl); 232 | return CreateType(decl->op, decl->right); 233 | } 234 | 235 | struct Node *CreateTypeFromDeclInContext(struct SymbolEntry *ctx, 236 | struct Node *decl) { 237 | assert(decl && decl->type == kASTDecl); 238 | return CreateTypeInContext(ctx, decl->op, decl->right); 239 | } 240 | 241 | struct Node *Tokenize(const char *input); 242 | struct Node *ParseDecl(void); 243 | static struct Node *CreateTypeFromInput(const char *s) { 244 | fprintf(stderr, "CreateTypeFromInput: %s\n", s); 245 | struct Node *tokens = Tokenize(s); 246 | InitParser(&tokens); 247 | return CreateTypeFromDecl(ParseDecl()); 248 | } 249 | _Noreturn void TestType() { 250 | fprintf(stderr, "Testing Type...\n"); 251 | 252 | struct Node *int_type = CreateTypeBase(CreateToken("int")); 253 | struct Node *another_int_type = CreateTypeBase(CreateToken("int")); 254 | struct Node *lvalue_int_type = CreateTypeLValue(int_type); 255 | struct Node *pointer_of_int_type = CreateTypePointer(int_type); 256 | struct Node *another_pointer_of_int_type = 257 | CreateTypePointer(another_int_type); 258 | 259 | assert(IsSameTypeExceptAttr(int_type, int_type)); 260 | assert(IsSameTypeExceptAttr(int_type, another_int_type)); 261 | assert(IsSameTypeExceptAttr(int_type, lvalue_int_type)); 262 | assert(IsSameTypeExceptAttr(lvalue_int_type, lvalue_int_type)); 263 | assert(!IsSameTypeExceptAttr(int_type, pointer_of_int_type)); 264 | assert( 265 | IsSameTypeExceptAttr(pointer_of_int_type, another_pointer_of_int_type)); 266 | assert(GetSizeOfType(int_type) == 4); 267 | assert(GetSizeOfType(pointer_of_int_type) == 8); 268 | 269 | struct Node *char_type = CreateTypeBase(CreateToken("char")); 270 | assert(GetSizeOfType(char_type) == 1); 271 | 272 | struct Node *long_type = CreateTypeBase(CreateToken("long")); 273 | assert(GetSizeOfType(long_type) == 4); 274 | 275 | struct Node *ppi_type = CreateTypePointer(pointer_of_int_type); 276 | 277 | struct Node *args_i = AllocList(); 278 | PushToList(args_i, int_type); 279 | struct Node *if_i_type = CreateTypeFunction(int_type, args_i); 280 | struct Node *ppif_i_type = CreateTypeFunction(ppi_type, args_i); 281 | 282 | struct Node *args_pi = AllocList(); 283 | PushToList(args_pi, pointer_of_int_type); 284 | struct Node *if_pi_type = CreateTypeFunction(int_type, args_pi); 285 | struct Node *ppif_pi_type = CreateTypeFunction(ppi_type, args_pi); 286 | 287 | struct Node *type; 288 | 289 | type = CreateTypeFromInput("void* (*f)(int size);"); 290 | PrintASTNode(type); 291 | type = GetTypeWithoutAttr(type); 292 | assert(type->type == kTypePointer); 293 | type = type->right; 294 | assert(type->type == kTypeFunction); 295 | type = GetReturnTypeOfFunction(type); 296 | assert(type->type == kTypePointer); 297 | assert(GetSizeOfType(type) == 8); 298 | 299 | type = CreateTypeFromInput("int v;"); 300 | PrintASTNode(type); 301 | assert(IsSameTypeExceptAttr(type, int_type)); 302 | 303 | type = CreateTypeFromInput("int *p;"); 304 | PrintASTNode(type); 305 | assert(IsSameTypeExceptAttr(type, pointer_of_int_type)); 306 | 307 | type = CreateTypeFromInput("int **p;"); 308 | PrintASTNode(type); 309 | assert(IsSameTypeExceptAttr(type, ppi_type)); 310 | 311 | type = CreateTypeFromInput("int f(int a);"); 312 | PrintASTNode(type); 313 | assert(IsSameTypeExceptAttr(type, if_i_type)); 314 | assert(!IsSameTypeExceptAttr(type, if_pi_type)); 315 | 316 | type = CreateTypeFromInput("int f(int *a);"); 317 | PrintASTNode(type); 318 | assert(!IsSameTypeExceptAttr(type, if_i_type)); 319 | assert(IsSameTypeExceptAttr(type, if_pi_type)); 320 | 321 | type = CreateTypeFromInput("int **f(int a);"); 322 | PrintASTNode(type); 323 | assert(IsSameTypeExceptAttr(type, ppif_i_type)); 324 | assert(!IsSameTypeExceptAttr(type, ppif_pi_type)); 325 | PrintASTNode(if_i_type); 326 | assert(!IsSameTypeExceptAttr(type, if_i_type)); 327 | assert(!IsSameTypeExceptAttr(type, if_pi_type)); 328 | 329 | type = 330 | CreateTypeFromInput("void (*signal(int sig, void (*func)(int)))(int);"); 331 | assert(type && GetTypeWithoutAttr(type) && 332 | GetTypeWithoutAttr(type)->type == kTypeFunction); 333 | PrintASTNode(type); 334 | 335 | type = CreateTypeFromInput("struct IncompleteStruct;"); 336 | assert(type); 337 | PrintASTNode(type); 338 | 339 | fprintf(stderr, "PASS\n"); 340 | exit(EXIT_SUCCESS); 341 | } 342 | --------------------------------------------------------------------------------