├── disassemble_fragment ├── example_programs ├── test │ ├── expression_test.uc │ ├── bitwise.uc │ ├── flow.uc │ ├── function_pointers.uc │ ├── predecleration.uc │ └── arguments.uc ├── hello_count.uc ├── recursive_fib.uc ├── fib.uc ├── simple_b.uf ├── example.bf ├── calculator.uc └── brainf.uc ├── README.md └── main.c /disassemble_fragment: -------------------------------------------------------------------------------- 1 | objdump -D -Mintel,x86-64 -b binary -m i386 $1 -------------------------------------------------------------------------------- /example_programs/test/expression_test.uc: -------------------------------------------------------------------------------- 1 | main () { 2 | printf("%d\n", 1000 / 2 * 23); 3 | } -------------------------------------------------------------------------------- /example_programs/test/bitwise.uc: -------------------------------------------------------------------------------- 1 | test() { 2 | return 8 << 1; 3 | } 4 | 5 | 6 | main () { 7 | printf("a:%d\n", test()); 8 | } -------------------------------------------------------------------------------- /example_programs/hello_count.uc: -------------------------------------------------------------------------------- 1 | main () { 2 | a = 0; 3 | printf("Number:"); 4 | scanf("%ul", &a); 5 | 6 | while (a > 0) { 7 | puts("Hello"); 8 | a = a - 1; 9 | } 10 | } -------------------------------------------------------------------------------- /example_programs/test/flow.uc: -------------------------------------------------------------------------------- 1 | 2 | main () { 3 | i = 0; 4 | while (i < 10) { 5 | i = i + 1; 6 | if (i == 5) 7 | break; 8 | printf("i:%d\n", i); 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /example_programs/test/function_pointers.uc: -------------------------------------------------------------------------------- 1 | test (a) { 2 | a("This is my message"); 3 | } 4 | 5 | custom_log(message) { 6 | printf("MSG: %s\n", message); 7 | } 8 | 9 | main () { 10 | test(puts); 11 | test(custom_log); 12 | } -------------------------------------------------------------------------------- /example_programs/test/predecleration.uc: -------------------------------------------------------------------------------- 1 | /* 2 | this is a very simple example that demonstrates redeclering functions 3 | 4 | */ 5 | 6 | 7 | test() {} 8 | 9 | main() { 10 | test(); 11 | } 12 | 13 | test() { 14 | puts("Hello world"); 15 | } -------------------------------------------------------------------------------- /example_programs/recursive_fib.uc: -------------------------------------------------------------------------------- 1 | fib(n) { 2 | if (n <= 1) 3 | return 1; 4 | 5 | return fib(n - 1) + fib(n - 2); 6 | } 7 | 8 | main (argc, argv) { 9 | i = 0; 10 | while (i < 20) { 11 | printf("fib %d: %d\n", i, fib(i)); 12 | i = i + 1; 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /example_programs/test/arguments.uc: -------------------------------------------------------------------------------- 1 | /* 2 | Program that prints arguments. 3 | */ 4 | 5 | main (argc, args) { 6 | printf("arg count: %d\n", argc); 7 | i = 0; 8 | while (i < argc) { 9 | printf("args[%d]: %s\n", i, *(args + i * 8)); 10 | i = i + 1; 11 | } 12 | } -------------------------------------------------------------------------------- /example_programs/fib.uc: -------------------------------------------------------------------------------- 1 | /* 2 | Program that prints out fibonacci numbers. 3 | */ 4 | 5 | main () { 6 | a = 1; 7 | b = 0; 8 | i = 0; 9 | 10 | while (i < 20) { 11 | printf("%d:%d\n", i, a); // yes we can call libc functions ! 12 | oldb = b; 13 | b = a; 14 | a = a + oldb; 15 | 16 | i = i + 1; 17 | } 18 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | This is the compiler I wrote as a part of my tutorial on writing a very simple compiler. 2 | 3 | Currently only supports the X86_64 architecture and the Linux kernel. 4 | 5 | To compile, just `gcc main.c -o simple_compiler`. 6 | 7 | Example usage `./simple_compiler example_programs/fib.uc` 8 | 9 | You can pass arguments to the main method like this: 10 | 11 | `./simple_compiler example_programs/calculator.uc "10 * 23"` 12 | -------------------------------------------------------------------------------- /example_programs/simple_b.uf: -------------------------------------------------------------------------------- 1 | [life.b -- John Horton Conway's Game of Life 2 | (c) 2021 Daniel B. Cristofani 3 | http://brainfuck.org/] 4 | 5 | >>>->+>+++++>(++++++++++)[[>>>+<<<-]>+++++>+>>+[<<+>>>>>+<<<-]<-]>>>>[ 6 | [>>>+>+<<<<-]+++>>+[<+>>>+>+<<<-]>>[>[[>>>+<<<-]<]<<++>+>>>>>>-]<- 7 | ]+++>+>[[-]<+<[>+++++++++++++++++<-]<+]>>[ 8 | [+++++++++.-------->>>]+[-<<<]>>>[>>,----------[>]<]<<[ 9 | <<<[ 10 | >--[<->>+>-<<-]<[[>>>]+>-[+>>+>-]+[<<<]<-]>++>[<+>-] 11 | >[[>>>]+[<<<]>>>-]+[->>>]<-[++>]>[------<]>+++[<<<]> 12 | ]< 13 | ]>[ 14 | -[+>>+>-]+>>+>>>+>[<<<]>->+>[ 15 | >[->+>+++>>++[>>>]+++<<<++<<<++[>>>]>>>]<<<[>[>>>]+>>>] 16 | <<<<<<<[<<++<+[-<<<+]->++>>>++>>>++<<<<]<<<+[-<<<+]+>->>->> 17 | ]<<+<<+<<<+<<-[+<+<<-]+<+[ 18 | ->+>[-<-<<[<<<]>[>>[>>>]<<+<[<<<]>-]] 19 | <[<[<[<<<]>+>>[>>>]<<-]<[<<<]]>>>->>>[>>>]+> 20 | ]>+[-<<[-]<]-[ 21 | [>>>]<[<<[<<<]>>>>>+>[>>>]<-]>>>[>[>>>]<<<<+>[<<<]>>-]> 22 | ]<<<<<<[---<-----[-[-[<->>+++<+++++++[-]]]]<+<+]> 23 | ]>> 24 | ] 25 | 26 | [This program simulates the Game of Life cellular automaton. 27 | 28 | It duplicates the interface of the classic program at 29 | http://www.linusakesson.net/programming/brainfuck/index.php, 30 | but this program was written from scratch. 31 | 32 | Type e.g. "be" to toggle the fifth cell in the second row, "q" to quit, 33 | or a bare linefeed to advance one generation. 34 | 35 | Grid wraps toroidally. Board size in parentheses in first line (2-166 work). 36 | 37 | This program is licensed under a Creative Commons Attribution-ShareAlike 4.0 38 | International License (http://creativecommons.org/licenses/by-sa/4.0/).] 39 | -------------------------------------------------------------------------------- /example_programs/example.bf: -------------------------------------------------------------------------------- 1 | ++++++++ Set Cell #0 to 8 2 | [ 3 | >++++ Add 4 to Cell #1; this will always set Cell #1 to 4 4 | [ as the cell will be cleared by the loop 5 | >++ Add 2 to Cell #2 6 | >+++ Add 3 to Cell #3 7 | >+++ Add 3 to Cell #4 8 | >+ Add 1 to Cell #5 9 | <<<<- Decrement the loop counter in Cell #1 10 | ] Loop until Cell #1 is zero; number of iterations is 4 11 | >+ Add 1 to Cell #2 12 | >+ Add 1 to Cell #3 13 | >- Subtract 1 from Cell #4 14 | >>+ Add 1 to Cell #6 15 | [<] Move back to the first zero cell you find; this will 16 | be Cell #1 which was cleared by the previous loop 17 | <- Decrement the loop Counter in Cell #0 18 | ] Loop until Cell #0 is zero; number of iterations is 8 19 | 20 | The result of this is: 21 | Cell no : 0 1 2 3 4 5 6 22 | Contents: 0 0 72 104 88 32 8 23 | Pointer : ^ 24 | 25 | >>. Cell #2 has value 72 which is 'H' 26 | >---. Subtract 3 from Cell #3 to get 101 which is 'e' 27 | +++++++..+++. Likewise for 'llo' from Cell #3 28 | >>. Cell #5 is 32 for the space 29 | <-. Subtract 1 from Cell #4 for 87 to give a 'W' 30 | <. Cell #3 was set to 'o' from the end of 'Hello' 31 | +++.------.--------. Cell #3 for 'rl' and 'd' 32 | >>+. Add 1 to Cell #5 gives us an exclamation point 33 | >++. And finally a newline from Cell #6 -------------------------------------------------------------------------------- /example_programs/calculator.uc: -------------------------------------------------------------------------------- 1 | /* 2 | * A simple calculator written in uC. 3 | */ 4 | 5 | char (ptr) { 6 | (*ptr & 255); 7 | } 8 | 9 | skipWhitespace(reader) { 10 | while(char(*reader) == *" " & char(*reader) != 0) { 11 | *reader = *reader + 1; 12 | } 13 | } 14 | 15 | parseInt(reader) { 16 | result = 0; 17 | sign = 1; 18 | if (**reader == *"-") { 19 | sign = -1; 20 | *reader = *reader + 1; 21 | } 22 | while (char(*reader) >= *"0" & char(*reader) <= *"9") { 23 | c = char(*reader); 24 | // I will overflow and feel no shame about it. 25 | result = result * 10; 26 | result = result + c - *"0"; 27 | *reader = *reader + 1; 28 | } 29 | 30 | return result * sign; 31 | } 32 | 33 | evalMul(reader) { 34 | left = parseInt(reader); 35 | while (1) { 36 | skipWhitespace(reader); 37 | size = 0; 38 | skipWhitespace(reader); 39 | op = char(*reader); 40 | 41 | if (op == *"*") { 42 | *reader = *reader + 1; 43 | skipWhitespace(reader); 44 | left = left * parseInt(reader); 45 | } else if(op == *"/") { 46 | *reader = *reader + 1; 47 | skipWhitespace(reader); 48 | left = left / parseInt(reader); 49 | } else { 50 | break; 51 | } 52 | } 53 | 54 | return left; 55 | } 56 | 57 | evalAdd(reader) { 58 | left = evalMul(reader); 59 | while (1) { 60 | skipWhitespace(reader); 61 | size = 0; 62 | skipWhitespace(reader); 63 | op = char(*reader); 64 | 65 | if (op == *"+") { 66 | *reader = *reader + 1; 67 | skipWhitespace(reader); 68 | left = left + evalMul(reader); 69 | } else if(op == *"-") { 70 | *reader = *reader + 1; 71 | skipWhitespace(reader); 72 | left = left - evalMul(reader); 73 | } else { 74 | break; 75 | } 76 | } 77 | 78 | return left; 79 | } 80 | 81 | 82 | main(argc, argv) { 83 | if(argc != 2) { 84 | puts("Usage calculator.sc '10 * 23'"); 85 | exit(1); 86 | } 87 | reader = *(argv + 8); 88 | printf("Result: %d\n", evalAdd(&reader)); 89 | } 90 | -------------------------------------------------------------------------------- /example_programs/brainf.uc: -------------------------------------------------------------------------------- 1 | /* 2 | * A simple BrainF interpreter written in uC. 3 | */ 4 | 5 | byte (ptr) { 6 | (*ptr & 255); 7 | } 8 | 9 | readFile(fileName) { 10 | file = fopen(fileName, "r"); 11 | if (file == 0) { 12 | puts("Couldn't open file !"); 13 | exit(1); 14 | } 15 | 16 | fseek(file, 0, 2); 17 | fileSize = ftell(file); 18 | fseek(file, 0, 0); 19 | buffer = calloc(fileSize + 16, 1); 20 | fread(buffer, 1, fileSize, file); 21 | 22 | (buffer); 23 | } 24 | 25 | main (argc, argv) { 26 | if (argc != 2) { 27 | puts("./Usage brainf.sc "); 28 | exit(1); 29 | } 30 | 31 | memory = calloc(3000000 * 8, 1); 32 | contents = readFile(*(argv + 8)); 33 | fileLength = strlen(contents); 34 | index = 3000000 / 2 * 8; 35 | codeIndex = 0; 36 | 37 | while (byte(contents + codeIndex) != 0) { 38 | c = byte(contents + codeIndex); 39 | //printf("Processing %d: %c, memory: %d\n ", codeIndex, c, index); 40 | if (c == *"+") { 41 | *(memory + index) = (*(memory + index) + 1) & 255; 42 | } else if (c == *"-") { 43 | *(memory + index) = (*(memory + index) - 1) & 255; 44 | } else if (c == *">") { 45 | index = index + 8; 46 | } else if (c == *"<") { 47 | index = index - 8; 48 | if (index < 0) { 49 | puts("Memory Index Negative !"); 50 | exit(1); 51 | } 52 | } else if (c == *".") { 53 | printf("%c", *(memory + index)); 54 | } else if (c == *",") { 55 | scanf("%c", memory + index); 56 | } else if (c == *"[" & *(memory + index) == 0 ) { 57 | balance = 0; 58 | continue = 1; 59 | while (continue & codeIndex != fileLength) { 60 | nc = byte(contents + codeIndex); 61 | if (nc == *"]") { 62 | balance = balance - 1; 63 | if (balance == 0) 64 | continue = 0; 65 | } else if (nc == *"[") { 66 | balance = balance + 1; 67 | } 68 | 69 | codeIndex = codeIndex + 1; 70 | } 71 | } else if (c == *"]" & *(memory + index) != 0) { 72 | balance = 0; 73 | continue = 1; 74 | while (continue & codeIndex != -1) { 75 | nc = byte(contents + codeIndex); 76 | if (nc == *"[") { 77 | balance = balance - 1; 78 | if (balance == 0) 79 | continue = 0; 80 | } else if (nc == *"]") { 81 | balance = balance + 1; 82 | } 83 | 84 | codeIndex = codeIndex - 1; 85 | } 86 | } 87 | 88 | codeIndex = codeIndex + 1; 89 | } 90 | 91 | } 92 | -------------------------------------------------------------------------------- /main.c: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * This simple compiler is a syntax directed translator that directly emits x64 (IA-64, AMD64) machine code 4 | * while recursive descent parsing. 5 | * 6 | * by Kuter Dinel. 08/2023 7 | * 8 | */ 9 | 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | #include // used for dynamic linking. 21 | #include // for allocating executable memory. 22 | 23 | // Buffer managment code. 24 | #define BUFFER_SIZE 100000 25 | 26 | uint8_t result_buffer[BUFFER_SIZE]; 27 | size_t result_size = 0; 28 | 29 | // used for dynamic linking. 30 | void *dlHandle = NULL; 31 | 32 | #define critical_check_msg(cond, msg) if (!(cond)) { printf(__FILE__ ":%d\n" msg "\nCondition failed: " #cond, __LINE__); exit(1);} 33 | #define critical_check(cond) critical_check_msg(cond, "Compilation aborted !") 34 | 35 | // Push various sized values into the result buffer. 36 | void push_int8(uint8_t w) { 37 | critical_check_msg(result_size != BUFFER_SIZE, "Buffer Overflow ! Try increasing BUFFER_SIZE"); 38 | result_buffer[result_size++] = w; 39 | } 40 | 41 | void push_int16(uint16_t w) { 42 | critical_check_msg(result_size + 2 <= BUFFER_SIZE, "Buffer Overflow ! Try increasing BUFFER_SIZE"); 43 | result_buffer[result_size++] = (uint8_t)(w & 0xFF); 44 | result_buffer[result_size++] = (uint8_t)(w >> 8 & 0xFF); 45 | } 46 | 47 | void push_int32(uint32_t w) { 48 | critical_check_msg(result_size + 4 <= BUFFER_SIZE, "Buffer Overflow ! Try increasing BUFFER_SIZE"); 49 | 50 | result_buffer[result_size++] = (uint8_t)(w & 0xFF); 51 | result_buffer[result_size++] = (uint8_t)(w >> 8 & 0xFF); 52 | result_buffer[result_size++] = (uint8_t)(w >> 16 & 0xFF); 53 | result_buffer[result_size++] = (uint8_t)(w >> 24 & 0xFF); 54 | } 55 | 56 | void push_int64(uint64_t w) { 57 | push_int32(w & 0xFFFFFFFF); 58 | push_int32((w >> 32) & 0xFFFFFFFF); 59 | } 60 | 61 | typedef int32_t slot_t; 62 | 63 | int peak_stack_size = 0; 64 | 65 | // Allocate a stack slot. 66 | slot_t slot_alloc() { 67 | int32_t result = peak_stack_size; 68 | peak_stack_size += 8; 69 | 70 | return result; 71 | } 72 | 73 | // Positive integers are assumed to be unsigned and if the integer is spoused to be signed 74 | // it's users responsibility to ensure that the integer is small enough. 75 | // 76 | // Negative integers are treated as signed integers. 77 | // TODO: Hex parsing. 78 | uint64_t read_int(char **current) { 79 | uint64_t result = 0; 80 | bool isNegative = false; 81 | if (**current == '-') { 82 | isNegative = true; 83 | (*current)++; 84 | } 85 | 86 | while (**current >= '0' && **current <= '9') { 87 | result *= 10; 88 | uint64_t d = (**current - '0'); 89 | // Make sure the integer does not overflow. 90 | critical_check_msg(result <= UINT64_MAX - d, "Int too big") 91 | result += d; 92 | (*current)++; 93 | } 94 | 95 | if (isNegative) { 96 | //FIXME: add assertion here. 97 | return -result; 98 | } 99 | 100 | return result; 101 | } 102 | 103 | char translate_escape(char c) { 104 | switch (c) { 105 | case 'n': 106 | return '\n'; 107 | case 'r': 108 | return '\r'; 109 | case '"': 110 | case '\'': 111 | case '\\': 112 | default: 113 | return c; 114 | } 115 | } 116 | 117 | char parse_char(char **current) { 118 | critical_check(**current == '\''); 119 | (*current)++; 120 | 121 | char c = **current; 122 | if (c == '\\') { 123 | (*current)++; 124 | c = translate_escape(**current); 125 | } 126 | critical_check(**current == '\''); 127 | (*current)++; 128 | 129 | return c; 130 | } 131 | 132 | char* parse_string(char **current) { 133 | critical_check(**current == '"'); 134 | (*current)++; 135 | 136 | char *start = *current; 137 | 138 | size_t length = 0; 139 | int is_escaped = 0; 140 | while (**current != '"' || is_escaped) { 141 | is_escaped = 0; 142 | if (**current == '\\') 143 | is_escaped = 1; 144 | (*current)++; 145 | length++; 146 | } 147 | 148 | (*current)++; 149 | 150 | char *resultString = malloc(length + 1); 151 | is_escaped = 0; 152 | size_t true_length = 0; 153 | for (size_t i = 0; start[i] != '"' || is_escaped; i++) { 154 | char c = start[i]; 155 | if (c == '\\') { 156 | is_escaped = 1; 157 | continue; 158 | } 159 | if (!is_escaped) { 160 | resultString[true_length++] = c; 161 | continue; 162 | } 163 | 164 | resultString[true_length++] = translate_escape(c); 165 | is_escaped = 0; 166 | } 167 | 168 | resultString[true_length + 1] = 0; // Zero termination. 169 | return resultString; 170 | } 171 | 172 | void skip_whitespace(char **current) { 173 | while (1) { 174 | char c = **current; 175 | if (c != ' ' && c != '\n' && c != '\r') 176 | break; 177 | (*current)++; 178 | } 179 | } 180 | 181 | bool skip_comment(char **current) { 182 | if (**current == '/') { 183 | (*current)++; 184 | if (**current == '/') { 185 | // single line comment 186 | while (**current && **current != '\n') 187 | (*current)++; 188 | critical_check(**current == '\n'); 189 | (*current)++; 190 | return true; 191 | } else if (**current == '*') { 192 | (*current)++; 193 | 194 | // Multiline comment. 195 | while (1) { 196 | while (**current && **current != '*') 197 | (*current)++; 198 | 199 | critical_check_msg(**current != 0, "unterminated comment"); 200 | (*current)++; 201 | if (**current == '/') { 202 | (*current)++; 203 | return true; 204 | } 205 | (*current)++; 206 | 207 | } 208 | } else { 209 | (*current)--; 210 | } 211 | } 212 | return false; 213 | } 214 | 215 | // Skip whitespace or comments. 216 | void skip_gap(char **current) { 217 | // We need to do it like this to be able to handle alternating whitespace and comments. 218 | while (1) { 219 | skip_whitespace(current); 220 | if (!skip_comment(current)) 221 | return; 222 | } 223 | } 224 | 225 | // Variable management code starts here. 226 | #define MAX_VAR_NAME_LENGTH 20 227 | 228 | // local variable or symbol. 229 | typedef struct { 230 | char name[MAX_VAR_NAME_LENGTH]; 231 | union { 232 | uint32_t slot; // Slot offset of local variable. 233 | uint64_t value; // Sym value. 234 | }; 235 | bool is_function; // Function symbols need special handling. 236 | } sym_t; 237 | 238 | typedef struct { 239 | sym_t *global_sym; 240 | bool is_relative; // is this a rel32 or imm64. 241 | void *result_point; 242 | } reloc_t; 243 | 244 | #define MAX_RELOC_COUNT 5000 245 | reloc_t relocations[MAX_RELOC_COUNT]; 246 | size_t reloc_count = 0; 247 | 248 | #define MAX_VARIABLE 100 249 | sym_t local_syms[MAX_VARIABLE] = {}; 250 | size_t local_sym_count = 0; 251 | 252 | sym_t global_syms[MAX_VARIABLE] = {}; 253 | size_t global_sym_count = 0; 254 | 255 | size_t slotCount = 0; 256 | 257 | // The buffer position of the current loop header. 258 | // Used to implement `continue` statements. 259 | size_t current_loop_header = 0; 260 | sym_t *current_loop_exit = NULL; 261 | 262 | sym_t *function_exit = NULL; 263 | 264 | sym_t* resolve_sym(sym_t *variables, size_t variable_count, char *name) { 265 | size_t len = strlen(name); 266 | critical_check(len < MAX_VAR_NAME_LENGTH); 267 | 268 | for (int i = 0; i < variable_count; i++) { 269 | sym_t *var = &variables[i]; 270 | size_t varLen = strlen(var->name); 271 | if (len != varLen) 272 | continue; 273 | if (memcmp(var->name, name, len) == 0) 274 | return var; 275 | } 276 | return NULL; 277 | } 278 | 279 | sym_t* insert_sym(sym_t *syms, size_t *sym_count, char *name, uint32_t slot) { 280 | critical_check_msg(MAX_VARIABLE > *sym_count, "try increasing MAX_VARIABLE"); 281 | 282 | sym_t *result = &syms[(*sym_count)++]; 283 | *result->name = 0; 284 | 285 | if (name != NULL) { 286 | size_t len = strlen(name); 287 | critical_check(len < MAX_VAR_NAME_LENGTH); 288 | memcpy(result->name, name, len + 1); 289 | } 290 | 291 | result->slot = slot; 292 | return result; 293 | } 294 | 295 | sym_t* define_sym(sym_t *syms, size_t *sym_count, char *name) { 296 | critical_check(name != NULL); 297 | sym_t *var = resolve_sym(syms, *sym_count, name); 298 | if (var != NULL) 299 | return var; 300 | 301 | slot_t slot = slot_alloc(); 302 | // variable has not been defined. 303 | return insert_sym(syms, sym_count, name, slot); 304 | } 305 | 306 | 307 | void push_reloc_at(sym_t *global, void *point, bool is_relative) { 308 | critical_check_msg(MAX_RELOC_COUNT > reloc_count, "try increasing MAX_RELOC_COUNT"); 309 | reloc_t *reloc = &relocations[reloc_count++]; 310 | reloc->global_sym = global; 311 | reloc->result_point = point; 312 | reloc->is_relative = is_relative; 313 | } 314 | 315 | void push_reloc(sym_t *global) { 316 | push_reloc_at(global, (void*)(result_buffer + result_size), false); 317 | } 318 | 319 | // Called at the end of compilation. 320 | void apply_relocs() { 321 | for (int i = 0; i < reloc_count; i++) { 322 | reloc_t *reloc = &relocations[i]; 323 | // Apply relocation. 324 | if (!reloc->is_relative) 325 | *(uint64_t*)(reloc->result_point) = reloc->global_sym->value; 326 | else 327 | *(uint32_t*)(reloc->result_point) = reloc->global_sym->value - 328 | (size_t)((uint8_t*)reloc->result_point - result_buffer) - 4; 329 | 330 | } 331 | } 332 | 333 | char ident_buffer[MAX_VAR_NAME_LENGTH]; 334 | size_t read_ident(char **current) { 335 | size_t length = 0; 336 | 337 | while (1) { 338 | char c = **current; 339 | if (!isalnum(c) && c != '_') 340 | break; 341 | (*current)++; 342 | 343 | critical_check(length < MAX_VAR_NAME_LENGTH); 344 | 345 | ident_buffer[length++] = c; 346 | } 347 | ident_buffer[length] = 0; 348 | return length; 349 | } 350 | 351 | typedef enum { 352 | RAX = 0, 353 | RCX = 1, 354 | RDX = 2, 355 | RBX = 3, 356 | RSP = 4, 357 | RBP = 5, 358 | RSI = 6, 359 | RDI = 7, 360 | R8 = 8, 361 | R9 = 9, 362 | R10 = 10, 363 | R11 = 11, 364 | R12 = 12, 365 | R13 = 13, 366 | R14 = 14, 367 | R15 = 15 368 | } reg64; 369 | 370 | // @W Make the addressing 64bit. 371 | // @R Extension for the ModR/M reg field. 372 | // @X Extension of the SIB index field. 373 | // @B Extension for the R/M field or opcode reg field. 374 | void emit_rex(uint8_t w, uint8_t r, uint8_t x, uint8_t b) { 375 | uint8_t result = (0b0100 << 4) | (w << 3) | (r << 2) | (x << 1) | b; 376 | push_int8(result); 377 | } 378 | 379 | // Store a constant value in a register. 380 | void mov_reg_const64(reg64 reg, uint64_t cons) { 381 | uint8_t regNumber = (uint8_t)reg; 382 | emit_rex(1, 0, 0, regNumber > 7); 383 | regNumber &= 0b111; 384 | 385 | // MOV REG64, CONST64 386 | push_int8(0xB8 | regNumber); 387 | push_int64(cons); 388 | } 389 | 390 | void mov_reg_global(reg64 reg, sym_t *global) { 391 | uint8_t regNumber = (uint8_t)reg; 392 | emit_rex(1, 0, 0, regNumber > 7); 393 | regNumber &= 0b111; 394 | 395 | // MOV REG64, CONST64 396 | push_int8(0xB8 | regNumber); 397 | 398 | push_reloc(global); 399 | push_int64(0); 400 | } 401 | 402 | // mod 403 | // 0 [rm] 404 | // 1 [rm + disp8] 405 | // 2 [rm + disp32] 406 | // 3 rm 407 | void emit_modrm(uint8_t mod, uint8_t regop, uint8_t rm) { 408 | uint8_t result = (mod << 6) | (regop << 3) | rm; 409 | push_int8(result); 410 | } 411 | 412 | // Emit modrm that represents slot value from stack. 413 | void modrm_slot(slot_t slot, uint8_t regop) { 414 | emit_modrm(2, regop, RBP); 415 | push_int32(-slot - 0x8); 416 | } 417 | 418 | /// mov [rbp + slot], reg 419 | void mov_slot_reg(slot_t slot, uint8_t reg) { 420 | emit_rex(1, reg > 7, 0, 0); 421 | reg &= 0b111; 422 | 423 | push_int8(0x89); 424 | 425 | modrm_slot(slot, reg); 426 | } 427 | 428 | void mov_reg_reg(uint8_t target, uint8_t source) { 429 | emit_rex(1, source > 7, 0, target > 7); 430 | source &= 0b111; 431 | target &= 0b111; 432 | push_int8(0x89); 433 | emit_modrm(3, source, target); 434 | } 435 | 436 | // Load a constant value into an allocated stack slot. 437 | slot_t mov_slot_const(uint64_t value) { 438 | slot_t slot = slot_alloc(); 439 | // There is no instruction in x86 to store a 64 bit value in modrm, 440 | // we have to use multiple instructions here. 441 | 442 | // mov rax, int64 443 | mov_reg_const64(RAX, value); 444 | // mov [rbp + slot], rax 445 | mov_slot_reg(slot, RAX); 446 | 447 | return slot; 448 | } 449 | 450 | void mov_reg_slot(slot_t slot, uint8_t reg) { 451 | emit_rex(1, 0, 0, reg > 7); 452 | reg &= 0b111; 453 | 454 | push_int8(0x8B); 455 | modrm_slot(slot, reg); 456 | } 457 | 458 | slot_t copy_slot(slot_t a) { 459 | slot_t result = slot_alloc(); 460 | mov_reg_slot(a, RAX); // mov RAX, [rbp + slot_a] 461 | mov_slot_reg(result, RAX); // mov [rbp + result], RAX 462 | return result; 463 | } 464 | 465 | slot_t add_slots(slot_t slot_a, slot_t slot_b) { 466 | slot_t result = copy_slot(slot_a); 467 | mov_reg_slot(slot_b, RAX); // mov, RAX, [rbp + slot_b] 468 | 469 | // add [rbp + result], RAX 470 | emit_rex(1, 0, 0, 0); 471 | push_int8(0x01); //opcode 472 | modrm_slot(result, RAX); 473 | 474 | return result; 475 | } 476 | 477 | // Compare slots and set flags. 478 | void cmp_slots(slot_t slot_a, slot_t slot_b) { 479 | mov_reg_slot(slot_b, RAX); // mov RAX, [rbp + slot_a] 480 | 481 | // cmp [rbp + slot_a], RAX 482 | emit_rex(1, 0, 0, 0); 483 | push_int8(0x39); // opcode 484 | modrm_slot(slot_a, RAX); 485 | } 486 | 487 | void cmp_const(slot_t a, uint32_t imm) { 488 | //REX.W + 81 /7 id 489 | emit_rex(1, 0, 0, 0); 490 | push_int8(0x81); 491 | modrm_slot(a, 7); 492 | push_int32(imm); 493 | } 494 | 495 | enum condition { 496 | EQUAL, 497 | NOT_EQUAL, 498 | GREATER, 499 | LESS, 500 | GREATER_EQUAL, 501 | LESS_EQUAL 502 | }; 503 | 504 | // Set the value a of a slot based on the value of a comparision. 505 | int get_compare(enum condition cond) { 506 | slot_t result = mov_slot_const(0); 507 | 508 | push_int8(0x0F); 509 | uint8_t secOp; 510 | switch (cond) { 511 | case EQUAL: 512 | secOp = 0x94; 513 | break; 514 | case NOT_EQUAL: 515 | secOp = 0x95; 516 | break; 517 | case GREATER: 518 | secOp = 0x9f; 519 | break; 520 | case LESS: 521 | secOp = 0x9C; 522 | break; 523 | case GREATER_EQUAL: 524 | secOp = 0x9D; 525 | break; 526 | case LESS_EQUAL: 527 | secOp = 0x9E; 528 | break; 529 | } 530 | push_int8(secOp); 531 | modrm_slot(result, 0); 532 | return result; 533 | } 534 | 535 | slot_t compare_slots(slot_t slot_a, slot_t slot_b, enum condition cond) { 536 | cmp_slots(slot_a, slot_b); 537 | return get_compare(cond); 538 | } 539 | 540 | slot_t sub_slots(slot_t slot_a, slot_t slot_b) { 541 | slot_t result = copy_slot(slot_a); 542 | mov_reg_slot(slot_b, RAX); // mov, RAX, [rbp + slot_b] 543 | 544 | // sub [rbp + result], RAX 545 | emit_rex(1, 0, 0, 0); 546 | push_int8(0x29); //opcode 547 | modrm_slot(result, RAX); 548 | 549 | return result; 550 | } 551 | 552 | slot_t and_slots(slot_t slot_a, slot_t slot_b) { 553 | slot_t result = copy_slot(slot_a); 554 | mov_reg_slot(slot_b, RAX); // mov, RAX, [rbp + slot_b] 555 | 556 | // and [rbp + result], RAX 557 | emit_rex(1, 0, 0, 0); 558 | push_int8(0x21); //opcode 559 | modrm_slot(result, RAX); 560 | 561 | return result; 562 | } 563 | 564 | slot_t or_slots(slot_t slot_a, slot_t slot_b) { 565 | slot_t result = copy_slot(slot_a); 566 | mov_reg_slot(slot_b, RAX); // mov, RAX, [rbp + slot_b] 567 | 568 | // or [rbp + result], RAX 569 | emit_rex(1, 0, 0, 0); 570 | push_int8(0x09); //opcode 571 | modrm_slot(result, RAX); 572 | 573 | return result; 574 | } 575 | 576 | slot_t not_slot(slot_t slot_a) { 577 | slot_t result = copy_slot(slot_a); 578 | emit_rex(1, 0, 0, 0); 579 | push_int8(0xf7); //opcode 580 | modrm_slot(result, 2); 581 | 582 | return result; 583 | } 584 | 585 | // Division and multiplication instruction are special in that they require one argument to be in the RAX register. 586 | slot_t div_slots(int slot_a, int slot_b) { 587 | slot_t result = slot_alloc(); 588 | mov_reg_slot(slot_a, RAX); // mov RAX, [rbp + slot_a] 589 | mov_reg_const64(RDX, 0); 590 | emit_rex(1, 0, 0, 0); 591 | 592 | push_int8(0xF7); //idiv opcode 593 | modrm_slot(slot_b, 7); 594 | 595 | mov_slot_reg(result, RAX); 596 | return result; 597 | } 598 | 599 | slot_t mul_slots(int slot_a, int slot_b) { 600 | slot_t result = slot_alloc(); 601 | mov_reg_slot(slot_a, RAX); // mov RAX, [rbp + slot_a] 602 | 603 | emit_rex(1, 0, 0, 0); 604 | 605 | push_int8(0xF7); // imul opcode 606 | modrm_slot(slot_b, 4); 607 | mov_slot_reg(result, RAX); 608 | return result; 609 | } 610 | 611 | void push_stack(uint8_t reg) { 612 | emit_rex(1, 0, 0, reg > 7); 613 | reg &= 0b111; 614 | push_int8(0xFF); 615 | emit_modrm(3, 6, reg); 616 | } 617 | 618 | void pop_stack(uint8_t reg) { 619 | emit_rex(1, 0, 0, reg > 7); 620 | reg &= 0b111; 621 | push_int8(0x8F); 622 | emit_modrm(3, 0, reg); 623 | } 624 | 625 | uint32_t* emit_prolog() { 626 | push_stack(RBP); // save rbp. 627 | mov_reg_reg(RBP, RSP); 628 | 629 | // sub RSP, 630 | emit_rex(1, 0, 0, 0); 631 | push_int8(0x81); 632 | emit_modrm(3, 5, RSP); 633 | 634 | // Result is the offset of the stack size. 635 | uint32_t *result = (uint32_t*)((void*)result_buffer + result_size); 636 | push_int32(0); // place holder. 637 | return result; 638 | } 639 | 640 | void emit_epilog() { 641 | mov_reg_reg(RSP, RBP); // restore rsp. 642 | pop_stack(RBP); // restore rbp 643 | push_int8(0xC3); // ret 644 | } 645 | 646 | slot_t compile_expression(char **current); 647 | 648 | slot_t compile_exp_function_call(char **current, slot_t function) { 649 | critical_check(**current == '('); 650 | (*current)++; 651 | 652 | 653 | slot_t arguments[6]; 654 | size_t argCount = 0; 655 | 656 | reg64 registers[] = { RDI, RSI, RDX, RCX, R8, R9}; 657 | 658 | // Parse arguments. 659 | while (1) { 660 | skip_gap(current); 661 | critical_check(argCount <= 6 && "FIXME: Support more than 6 args for function calls."); 662 | if (**current == ')') 663 | break; 664 | 665 | arguments[argCount++] = compile_expression(current); 666 | skip_gap(current); 667 | 668 | if (**current == ')') 669 | break; 670 | critical_check(**current == ','); 671 | (*current)++; 672 | } 673 | (*current)++; 674 | 675 | // Move argument slots to call argument registers. 676 | for (size_t i = 0; i < argCount; i++) { 677 | mov_reg_slot(arguments[i], registers[i]); 678 | } 679 | 680 | push_int8(0xFF); // call opcode. 681 | modrm_slot(function, 2); 682 | 683 | // Store the function return value. 684 | slot_t result = slot_alloc(); 685 | mov_slot_reg(result, RAX); 686 | 687 | return result; 688 | } 689 | 690 | // Load the address of the slot. 691 | slot_t load_slot_address(slot_t slot) { 692 | emit_rex(1, 0, 0, 0); 693 | push_int8(0x8D); // LEA 694 | modrm_slot(slot, RAX); 695 | 696 | slot_t result = slot_alloc(); 697 | mov_slot_reg(result, RAX); 698 | 699 | return result; 700 | } 701 | 702 | // Returns the stack slot where the result is stored. 703 | slot_t compile_exp_atom_(char **current) { 704 | 705 | 706 | skip_gap(current); 707 | 708 | char c = **current; 709 | 710 | if (c == '&') { // Reference. 711 | (*current)++; 712 | slot_t atom = compile_exp_atom_(current); 713 | return load_slot_address(atom); 714 | } 715 | 716 | if (c == '~') { // logical not . 717 | (*current)++; 718 | slot_t atom = compile_exp_atom_(current); 719 | return not_slot(atom); 720 | } 721 | 722 | if (c == '*') { // Dereference 723 | (*current)++; 724 | slot_t atom = compile_exp_atom_(current); 725 | mov_reg_slot(atom, RAX); 726 | 727 | //TODO: Move this to a separate function. 728 | emit_rex(1, 0, 0, 0); 729 | push_int8(0x8B); 730 | emit_modrm(0, RAX, RAX); 731 | 732 | slot_t result = slot_alloc(); 733 | mov_slot_reg(result, RAX); 734 | return result; 735 | } 736 | 737 | // The atom is a constant 738 | if (c >= '0' && c <= '9'){ 739 | int value = read_int(current); 740 | return mov_slot_const(value); 741 | } 742 | 743 | if (c == '-') { 744 | char nc = *(*current + 1); 745 | if (nc >= '0' && nc <= '9') { 746 | int value = read_int(current); 747 | return mov_slot_const(value); 748 | } 749 | // Analytical negation. 750 | critical_check_msg(false, "unimplemented"); 751 | } 752 | 753 | // Parenthesis expression. 754 | // (123 * 123 * ...) 755 | if (c == '(') { 756 | (*current)++; 757 | slot_t result = compile_expression(current); 758 | critical_check(**current == ')'); 759 | (*current)++; 760 | return result; 761 | } 762 | 763 | // function call or variable reference. 764 | if (isalpha(c)) { 765 | read_ident(current); 766 | 767 | sym_t *var = resolve_sym(local_syms, local_sym_count, ident_buffer); 768 | if (var) 769 | return var->slot; 770 | 771 | sym_t *global_sym = resolve_sym(global_syms, global_sym_count, ident_buffer); 772 | if (global_sym) { 773 | mov_reg_global(RAX, global_sym); 774 | slot_t result = slot_alloc(); 775 | mov_slot_reg(result, RAX); 776 | 777 | return result; 778 | } 779 | 780 | uint64_t handle = (uint64_t)dlsym(dlHandle, ident_buffer); 781 | critical_check_msg(handle != 0, "Variable not found"); 782 | 783 | return mov_slot_const(handle); 784 | } 785 | 786 | if (c == '"') { 787 | char *string = parse_string(current); 788 | return mov_slot_const((uint64_t)(void*)string); 789 | } 790 | 791 | if (c == '\'') { 792 | char c = parse_char(current); 793 | return mov_slot_const((uint64_t)c); 794 | } 795 | 796 | return -1; 797 | } 798 | 799 | slot_t compile_exp_atom(char **current) { 800 | slot_t result = compile_exp_atom_(current); 801 | 802 | skip_gap(current); 803 | if (**current == '(') 804 | return compile_exp_function_call(current, result); 805 | 806 | return result; 807 | } 808 | 809 | 810 | typedef slot_t (*compile_operator_t)(slot_t a, slot_t b); 811 | 812 | typedef struct { 813 | char *op_string; 814 | size_t precedence; 815 | compile_operator_t compile_op; 816 | } bin_operator_t; 817 | 818 | slot_t equals_slots(slot_t a, slot_t b) { 819 | return compare_slots(a, b, EQUAL); 820 | } 821 | 822 | slot_t not_equals_slots(slot_t a, slot_t b) { 823 | return compare_slots(a, b, NOT_EQUAL); 824 | } 825 | 826 | slot_t greater_slots(slot_t a, slot_t b) { 827 | return compare_slots(a, b, GREATER); 828 | } 829 | 830 | slot_t greater_equal_slots(slot_t a, slot_t b) { 831 | return compare_slots(a, b, GREATER_EQUAL); 832 | } 833 | 834 | slot_t less_slots(slot_t a, slot_t b) { 835 | return compare_slots(a, b, LESS); 836 | } 837 | 838 | slot_t less_equal_slots(slot_t a, slot_t b) { 839 | return compare_slots(a, b, LESS_EQUAL); 840 | } 841 | 842 | slot_t right_shift_slots(slot_t a, slot_t b) { 843 | slot_t result = copy_slot(a); 844 | 845 | mov_reg_slot(b, RCX); 846 | 847 | //SAR [rbp + a], CL 848 | emit_rex(1, 0, 0, 0); 849 | push_int8(0xD3); 850 | modrm_slot(result, 7); 851 | 852 | return result; 853 | } 854 | 855 | slot_t left_shift_slots(slot_t a, slot_t b) { 856 | slot_t result = copy_slot(a); 857 | 858 | mov_reg_slot(b, RCX); 859 | 860 | //SAL [rbp + a], CL 861 | emit_rex(1, 0, 0, 0); 862 | push_int8(0xD3); 863 | modrm_slot(result, 4); 864 | 865 | return result; 866 | 867 | } 868 | 869 | #define MAX_PRECEDENCE 5 870 | 871 | 872 | bin_operator_t bin_operators[] = { 873 | {.op_string="*", .precedence=4, .compile_op=mul_slots}, 874 | {.op_string="/", .precedence=4, .compile_op=div_slots}, 875 | {.op_string="+", .precedence=3, .compile_op=add_slots}, 876 | {.op_string="-", .precedence=3, .compile_op=sub_slots}, 877 | 878 | {.op_string=">>", .precedence=2, .compile_op=right_shift_slots}, 879 | {.op_string="<<", .precedence=2, .compile_op=left_shift_slots}, 880 | 881 | {.op_string="==",.precedence=1, .compile_op=equals_slots}, 882 | {.op_string="!=", .precedence=1, .compile_op=not_equals_slots}, 883 | 884 | {.op_string=">=", .precedence=1, .compile_op=greater_equal_slots}, 885 | {.op_string="<=", .precedence=1, .compile_op=less_equal_slots}, 886 | 887 | {.op_string=">", .precedence=1, .compile_op=greater_slots}, 888 | 889 | {.op_string="<", .precedence=1, .compile_op=less_slots}, 890 | 891 | {.op_string="&", .precedence=0, .compile_op=and_slots}, 892 | {.op_string="|", .precedence=0, .compile_op=or_slots}, 893 | }; 894 | 895 | 896 | slot_t compile_expression_(char **current, int precedence) { 897 | skip_gap(current); 898 | if (precedence == MAX_PRECEDENCE) 899 | return compile_exp_atom(current); 900 | 901 | int64_t result = compile_expression_(current, precedence + 1); 902 | 903 | while (1) { 904 | skip_gap(current); 905 | char op = **current; 906 | if (op == 0) 907 | return result; 908 | char nOP = *(*current + 1); 909 | 910 | compile_operator_t compile_op_fun = NULL; 911 | // Go over each operator and check if the current operator 912 | for (size_t i = 0; i < sizeof(bin_operators) / sizeof(bin_operator_t); i++) { 913 | bin_operator_t *operator = &bin_operators[i]; 914 | if (operator->op_string[0] != op) 915 | continue; 916 | if (operator->op_string[1] != 0 && operator->op_string[1] != nOP) 917 | continue; 918 | // Operator found, check precedence. 919 | if (operator->precedence != precedence) 920 | break; 921 | 922 | // Operator and precedence matches. 923 | compile_op_fun = operator->compile_op; 924 | (*current)++; 925 | if (operator->op_string[1] != 0) 926 | (*current)++; 927 | break; 928 | } 929 | 930 | if (compile_op_fun == NULL) 931 | return result; 932 | 933 | slot_t right = compile_expression_(current, precedence + 1); 934 | result = compile_op_fun(result, right); 935 | } 936 | } 937 | 938 | slot_t compile_expression(char **current) { 939 | return compile_expression_(current, 0); 940 | } 941 | 942 | void* allocate_executable(size_t size) { 943 | void* result = mmap( 944 | NULL, 945 | size, 946 | PROT_READ | PROT_WRITE | PROT_EXEC, 947 | MAP_ANONYMOUS | MAP_PRIVATE, 948 | -1, 949 | 0 950 | ); 951 | 952 | critical_check(result != MAP_FAILED); 953 | return result; 954 | } 955 | 956 | slot_t compile_statement(char **current); 957 | slot_t compile_exp_block(char **current); 958 | 959 | slot_t compile_exp_st_block(char **current) { 960 | skip_gap(current); 961 | if (**current == '{') { 962 | (*current)++; 963 | slot_t result = compile_exp_block(current); 964 | critical_check(**current == '}'); 965 | (*current)++; 966 | return result; 967 | } 968 | return compile_statement(current); 969 | } 970 | 971 | // Jump if zero. 972 | uint32_t* jump_zero_offset() { 973 | push_int8(0x0f); 974 | push_int8(0x84); 975 | uint32_t *jump_offset_point = (uint32_t*)(result_buffer + result_size); 976 | push_int32(0); // will be replaced later. 977 | return jump_offset_point; 978 | } 979 | 980 | uint32_t* jump_offset() { 981 | push_int8(0xE9); 982 | uint32_t *jump_offset_point = (uint32_t*)(result_buffer + result_size); 983 | push_int32(0); // will be replaced later. 984 | return jump_offset_point; 985 | } 986 | 987 | void compile_if(char **current) { 988 | skip_gap(current); 989 | 990 | critical_check(**current == '('); 991 | (*current)++; 992 | // Evaluate the condition. 993 | slot_t condition = compile_expression(current); 994 | 995 | critical_check(**current == ')'); 996 | (*current)++; 997 | 998 | cmp_const(condition, 0); 999 | // Jump to end of block if the condition is not met. 1000 | uint32_t *jump_offset_point = jump_zero_offset(); 1001 | /* 1002 | push_int8(0x0f); 1003 | push_int8(0x84); 1004 | 1005 | uint32_t *jump_offset_point = (uint32_t*)(result_buffer + result_size); 1006 | push_int32(0); // will be replaced later. 1007 | */ 1008 | uint32_t body_begin = result_size; 1009 | 1010 | compile_exp_st_block(current); 1011 | skip_gap(current); 1012 | char *backup = *current; 1013 | read_ident(current); 1014 | if (strcmp(ident_buffer, "else") == 0) { // Compile else. 1015 | uint32_t *jump_over_else = jump_offset(); // jump over the else if we are in the if block. 1016 | uint32_t else_body_begin = result_size; 1017 | *jump_offset_point = result_size - body_begin; // set the jump over offset. 1018 | compile_exp_st_block(current); 1019 | *jump_over_else = result_size - else_body_begin; 1020 | } else { 1021 | *current = backup; // next token is not else, restore the reader point. 1022 | *jump_offset_point = result_size - body_begin; // set the jump over offset. 1023 | } 1024 | } 1025 | 1026 | void compile_while(char **current) { 1027 | skip_gap(current); 1028 | 1029 | critical_check(**current == '('); 1030 | (*current)++; 1031 | size_t loop_start = result_size; 1032 | 1033 | // Push new loop header. 1034 | size_t old_loop_header = current_loop_header; 1035 | current_loop_header = result_size; 1036 | 1037 | // Push new loop exit symbol. 1038 | sym_t * loop_exit = insert_sym(global_syms, &global_sym_count, NULL, -1); 1039 | sym_t *old_loop_exit = current_loop_exit; 1040 | current_loop_exit = loop_exit; 1041 | 1042 | // Evaluate the condition. 1043 | slot_t condition = compile_expression(current); 1044 | 1045 | critical_check(**current == ')'); 1046 | (*current)++; 1047 | cmp_const(condition, 0); 1048 | 1049 | uint32_t *jump_offset_point = jump_zero_offset(); 1050 | uint32_t body_begin = result_size; 1051 | 1052 | compile_exp_st_block(current); 1053 | 1054 | // Return to the start of the loop. 1055 | uint32_t *return_jump_offset_point = jump_offset(); 1056 | *return_jump_offset_point = loop_start - result_size; 1057 | *jump_offset_point = result_size - body_begin; 1058 | 1059 | current_loop_exit->value = result_size; 1060 | 1061 | current_loop_header = old_loop_header; // restore loop header position. 1062 | current_loop_exit = old_loop_exit; // restore loop exit symbol. 1063 | } 1064 | 1065 | slot_t compile_statement(char **current) { 1066 | skip_gap(current); 1067 | 1068 | char *begin = *current; 1069 | char c = **current; 1070 | if (c >= 'a' && c <= 'z') { 1071 | read_ident(current); 1072 | if (strcmp(ident_buffer, "if") == 0) { 1073 | compile_if(current); 1074 | return -1; 1075 | } else if (strcmp(ident_buffer, "while") == 0) { 1076 | compile_while(current); 1077 | return -1; 1078 | } else if (strcmp(ident_buffer, "continue") == 0) { 1079 | uint32_t *loop_continue = jump_offset(); 1080 | *loop_continue = current_loop_header - result_size; 1081 | critical_check(**current == ';'); 1082 | (*current)++; 1083 | 1084 | return -1; 1085 | } else if (strcmp(ident_buffer, "break") == 0) { 1086 | // insert a jump to the end of the loop. 1087 | jump_offset(); 1088 | push_reloc_at(current_loop_exit, ((void*)(result_buffer + result_size - 4)), true); 1089 | 1090 | critical_check(**current == ';'); 1091 | (*current)++; 1092 | 1093 | return -1; 1094 | } else if (strcmp(ident_buffer, "return") == 0) { 1095 | skip_gap(current); 1096 | 1097 | if (**current == ';') { 1098 | (*current)++; 1099 | } else { 1100 | slot_t returnValue = compile_expression(current); 1101 | mov_reg_slot(returnValue, RAX); // load the return value to the return register. 1102 | 1103 | critical_check(**current == ';'); 1104 | (*current)++; 1105 | } 1106 | 1107 | jump_offset(); 1108 | push_reloc_at(function_exit, ((void*)(result_buffer + result_size - 4)), true); 1109 | return -1; 1110 | } else { 1111 | skip_gap(current); 1112 | 1113 | // try to parse variable assignment. 1114 | if (**current == '=') { 1115 | (*current)++; 1116 | sym_t *var = define_sym(local_syms, &local_sym_count, ident_buffer); 1117 | slot_t result = compile_expression(current); 1118 | mov_reg_slot(result, RAX); 1119 | mov_slot_reg(var->slot, RAX); 1120 | critical_check(**current == ';'); 1121 | (*current)++; 1122 | return -1; 1123 | } 1124 | 1125 | // This is a small hack, since our tokenizer does not support look ahead. 1126 | *current = begin; 1127 | } 1128 | } 1129 | 1130 | if (c == '*') { // Memory assignment. 1131 | // *asd = ... 1132 | // 1133 | (*current)++; 1134 | slot_t address = compile_exp_atom(current); 1135 | skip_gap(current); 1136 | 1137 | critical_check(**current == '='); 1138 | (*current)++; 1139 | 1140 | slot_t value = compile_expression(current); 1141 | mov_reg_slot(address, RAX); 1142 | mov_reg_slot(value, RDI); 1143 | 1144 | //TODO: Move this to a seperate function. 1145 | // mov [rax], rdi 1146 | emit_rex(1, 0, 0, 0); 1147 | push_int8(0x89); 1148 | emit_modrm(0, RDI, RAX); 1149 | critical_check_msg(**current == ';', "Expected semicolon after statement."); 1150 | (*current)++; 1151 | return -1; 1152 | } 1153 | 1154 | 1155 | slot_t result = compile_expression(current); 1156 | critical_check_msg(**current == ';', "Expected semicolon after statement."); 1157 | (*current)++; 1158 | return result; 1159 | } 1160 | 1161 | slot_t compile_exp_block(char **current) { 1162 | slot_t result; 1163 | while (**current != 0 && **current != '}') { 1164 | result = compile_statement(current); 1165 | skip_gap(current); 1166 | } 1167 | return result; 1168 | } 1169 | 1170 | void compile_exp_function_dec(char **current) { 1171 | skip_gap(current); 1172 | critical_check_msg(isalpha(**current), "expected identifier"); 1173 | read_ident(current); 1174 | //define function. 1175 | sym_t *function_sym = define_sym(global_syms, &global_sym_count, ident_buffer); 1176 | function_exit = insert_sym(global_syms, &global_sym_count, NULL, 0); 1177 | 1178 | function_sym->is_function = true; 1179 | function_sym->value = result_size; 1180 | slotCount = 0; // reset stack size. 1181 | local_sym_count = 0; // reset variables. 1182 | 1183 | uint32_t *stack_size_location = emit_prolog(); 1184 | 1185 | skip_gap(current); 1186 | 1187 | critical_check_msg(**current == '(', "Was expecting argument list"); 1188 | (*current)++; 1189 | 1190 | reg64 registers[] = { RDI, RSI, RDX, RCX, R8, R9}; 1191 | 1192 | size_t arg_count = 0; 1193 | // Parse and compile function arguments. 1194 | while (**current != ')') { 1195 | if (arg_count != 0) { 1196 | skip_gap(current); 1197 | critical_check(**current == ','); 1198 | (*current)++; 1199 | } 1200 | skip_gap(current); 1201 | read_ident(current); 1202 | sym_t *arg = define_sym(local_syms, &local_sym_count, ident_buffer); 1203 | mov_slot_reg(arg->slot, registers[arg_count++]); 1204 | } 1205 | 1206 | critical_check_msg(**current == ')', "Was expecting argument list to be closed."); 1207 | (*current)++; 1208 | 1209 | 1210 | slot_t result_slot = compile_exp_st_block(current); 1211 | 1212 | if (result_slot != -1) 1213 | mov_reg_slot(result_slot, RAX); // result value is stored in RAX 1214 | 1215 | function_exit->value = result_size; 1216 | 1217 | emit_epilog(); 1218 | *stack_size_location = (uint32_t)((peak_stack_size + 15) / 16 * 16); 1219 | } 1220 | 1221 | char* read_file(char *filename) { 1222 | FILE *file = fopen(filename, "r"); 1223 | critical_check_msg(file != NULL, "Couldn't open file"); 1224 | fseek(file, 0, SEEK_END); 1225 | size_t file_size = ftell(file); 1226 | fseek(file, 0, SEEK_SET); 1227 | 1228 | char *buffer = (char *)malloc(file_size + 1); 1229 | fread(buffer, 1, file_size, file); 1230 | buffer[file_size] = 0; 1231 | 1232 | return buffer; 1233 | } 1234 | 1235 | void compile_program(char **current) { 1236 | while (**current != 0) { 1237 | compile_exp_function_dec(current); 1238 | skip_gap(current); 1239 | } 1240 | } 1241 | 1242 | int main(int argc, char *args[]) { 1243 | critical_check_msg(argc >= 2, "Usage: ./simplest_compiler "); 1244 | 1245 | dlHandle = dlopen(0, RTLD_NOW); // needed for dynamically linking. 1246 | 1247 | char *contents = read_file(args[1]); 1248 | 1249 | compile_program(&contents); 1250 | 1251 | // Allocate executable memory, we need to know the memory address before we link the program. 1252 | void *executable = allocate_executable(result_size); 1253 | 1254 | // Compute absolute addresses for locally defined functions. 1255 | for (int i = 0; i < global_sym_count; i++) { 1256 | if (global_syms[i].is_function) 1257 | global_syms[i].value += (uint64_t)executable; 1258 | } 1259 | 1260 | apply_relocs(); 1261 | memcpy((void*)executable, result_buffer, result_size); 1262 | 1263 | FILE *file = fopen("assembly_output", "wb"); 1264 | fwrite(result_buffer, result_size, 1, file); 1265 | fclose(file); 1266 | 1267 | 1268 | // Execute the main function. 1269 | sym_t *funMain = resolve_sym(global_syms, global_sym_count, "main"); 1270 | void *fun = (void*)funMain->value; 1271 | 1272 | puts("executing code"); 1273 | ((int (*)(uint64_t, char**))(fun))(argc - 1, args + 1); 1274 | 1275 | return 0; 1276 | } 1277 | --------------------------------------------------------------------------------