├── .gitignore ├── .gitmodules ├── LICENSE ├── Makefile ├── README.md ├── arena.h ├── ast.c ├── ast.h ├── ast_node_type.h ├── buffer_util.h ├── codegen.h ├── compile.c ├── compile.h ├── compile_flags.txt ├── compiler.c ├── data_type.h ├── elf.c ├── elf.h ├── elf64.c ├── emu.c ├── examples ├── break-statement.c ├── hello-world.c ├── http.c ├── include │ ├── assert.h │ ├── fcntl.h │ ├── stdarg.h │ ├── stddef.h │ ├── stdio.h │ ├── stdlib.h │ ├── string.h │ ├── sys │ │ └── syscall.h │ ├── time.h │ └── unistd.h ├── infinite-loop-print-time-sleep.c ├── memory-ffi.c ├── printf.c ├── ptr.c ├── read-file-into-buffer.c ├── struct.c ├── syscall.c └── user-input.c ├── imm.h ├── instruction.h ├── lex.c ├── main-ast.c ├── main.c ├── memory.c ├── operand.h ├── parse.c ├── parse.h ├── pe.c ├── pre.c ├── register.h ├── std.h ├── test.c ├── tests ├── exit-code.c ├── precedence.c ├── run.sh └── while-loop.c ├── token.h ├── tools └── dump-opcodes.c ├── types.h ├── util.h ├── virtual_opcodes.h ├── vm.c ├── x64.c └── x86.c /.gitignore: -------------------------------------------------------------------------------- 1 | TAGS 2 | .ccls-cache/ 3 | bin/ 4 | \#*\# 5 | *~ 6 | .vs/ 7 | *.obj 8 | *.exe 9 | *.recipe 10 | *.ipdb 11 | *.log 12 | *.pdb 13 | *.iobj 14 | *.tlog 15 | *.txt -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "rhd"] 2 | path = rhd 3 | url = https://github.com/riicchhaarrd/rhd 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2021, riicchhaarrd 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CC = gcc 2 | CFLAGS = -g -w 3 | 4 | OUT_DIR = bin 5 | .PHONY = all 6 | 7 | all: directories compiler 8 | 9 | pre: parse.c lex.c pre.c 10 | @echo "Building preprocessor" 11 | @$(CC) -m64 $(CFLAGS) -DSTANDALONE parse.c lex.c pre.c -o bin/pre64 12 | 13 | compiler: main.c lex.c ast.c compiler.c x64.c pe.c elf.c pre.c parse.c memory.c 14 | @echo "Building compiler" 15 | @$(CC) -m64 $(CFLAGS) main.c lex.c ast.c compiler.c x64.c pe.c elf.c elf64.c pre.c parse.c memory.c -o bin/ocean64 16 | 17 | ast: main-ast.c lex.c ast.c pre.c parse.c 18 | @echo "Building AST" 19 | @$(CC) -m64 $(CFLAGS) main-ast.c lex.c ast.c pre.c parse.c -o bin/ast64 20 | 21 | directories: ${OUT_DIR} 22 | 23 | ${OUT_DIR}: 24 | @mkdir -p bin 25 | 26 | clean: 27 | @echo "Cleaning up" 28 | rm bin/* 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ocean 2 | Programming language that compiles into a x86 ELF executable. 3 | 4 | The main goal at the moment is to create a C compiler, which can atleast compile itself. 5 | 6 | # TODO 7 | In no particular order and may be planned in the near or far future. Not all of them are definitive aswell and may be scrapped if other better structurally sound suggestions / ideas may arise. 8 | 9 | It does not list every however small it be functionality of C, like statements/expressions or what may be implemented already or isn't done yet. I may compile a list of those in the future to keep track of what to work on once most of functionality you expect from C has been implemented. 10 | 11 | # Short term 12 | 13 | - [x] basic features of a programming language, conditionals/loops/variables/types/order of precedence e.g (1 + 2) * 3 - 4 / 5 + (6 * 8) 14 | - [ ] standard library 15 | - [ ] other compile targets e.g https://docs.microsoft.com/en-us/windows/win32/debug/pe-format 16 | - [ ] self-hosting 17 | - [ ] a working C compiler (excluding any use of GCC/clang specific features like __attribute__ or such) 18 | 19 | # Long(er) term 20 | - [ ] Add new or borrow from other language(s) features/ideas ontop of C, deviate away from just C 21 | 22 | - [ ] defer 23 | - [ ] operator overloading 24 | - [ ] constexpr/consteval from c++ or some #run directive (https://github.com/BSVino/JaiPrimer/blob/master/JaiPrimer.md) 25 | - [ ] learn from other similar projects (e.g https://news.ycombinator.com/item?id=27890888) 26 | - [ ] array of struct to struct of arrays conversion and vice versa https://en.wikipedia.org/wiki/AoS_and_SoA 27 | - [ ] something like lisp ' to pass around expressions without evaluating them 28 | - [ ] builtin string type that keeps track of it's capacity/size/hash and when comparing first just compares the hash then if true the actual data or index based strings 29 | - [ ] struct members default values 30 | - [ ] coroutines by either saving entire stack for that function to heap or incorporating some message system notify/waittill (https://github.com/riicchhaarrd/gsc) sort of like sleeping functions that will wake up once you call notify on some key 31 | - [ ] more math operators / integration 32 | - cross/dot operator (e.g a cross b dot c) 33 | 34 | - backtick or other operator e.g math to evaluate math expressions as they are in math (e.g ` or math(a . c x b) either with x . or × and ⋅ like sizeof(int)) 35 | 36 | - parse LaTeX (e.g \[ \vec{a}\cdot\vec{b} = |\vec{a}||\vec{b}|\cos\theta \] 37 | $W = \vec{F}\cdot\vec{d} = F\cos\theta d$) 38 | 39 | - integrate some form of a symbolic language like wolfram or error on results that can't be solved/indeterminate/don't make sense in code 40 | - [ ] target other ISA (instruction set architectures) e.g ARM/x86_64 41 | - [ ] better error handling / memory cleanup 42 | - [ ] LLVM or other IR backend (atm just targeting x86 and it's a great learning exercise) 43 | 44 | # Example 45 | 46 | Example of code (may be subject to change) 47 | 48 | ```c 49 | int strlen(const char *s) 50 | { 51 | int i = 0; 52 | while(s[i]) 53 | { 54 | i+=1; 55 | } 56 | return i; 57 | } 58 | 59 | void print(const char *s) 60 | { 61 | write(1, s, strlen(s) + 1); 62 | } 63 | 64 | int main() 65 | { 66 | print("hello, world!\n"); 67 | 68 | char msg[32]; 69 | msg[0] = 'h'; 70 | msg[1] = 'e'; 71 | msg[2] = 'l'; 72 | msg[3] = 'l'; 73 | msg[4] = 'o'; 74 | msg[5] = ','; 75 | msg[6] = ' '; 76 | msg[7] = 'w'; 77 | msg[8] = 'o'; 78 | msg[9] = 'r'; 79 | msg[10] = 'l'; 80 | msg[11] = 'd'; 81 | msg[12] = '!'; 82 | msg[13] = 10; 83 | msg[14] = 0; 84 | 85 | int i; 86 | 87 | for(i = 0; i < 10; i += 1) 88 | { 89 | if(i % 2 ==0) 90 | { 91 | print(msg); 92 | } 93 | } 94 | } 95 | ``` 96 | 97 | Which compiles into (no relocations, when compiling to ELF strings will be relocated and 0xcccccccc would be replaced by the actual location) 98 | 99 | ```asm 100 | mov eax, 0 101 | call eax 102 | xor ebx, ebx 103 | xor eax, eax 104 | inc eax 105 | int 0x80 106 | push ebp 107 | mov ebp, esp 108 | sub esp, 4 109 | mov eax, 0 110 | push eax 111 | lea ebx, [ebp - 4] 112 | pop eax 113 | mov dword [ebx], eax 114 | mov ebx, dword [ebp + 8] 115 | mov eax, dword [ebp - 4] 116 | add ebx, eax 117 | movzx eax, byte [ebx] 118 | test eax, eax 119 | je 0x47 120 | mov eax, 1 121 | push eax 122 | lea ebx, [ebp - 4] 123 | pop eax 124 | add dword [ebx], eax 125 | jmp 0x23 126 | mov eax, dword [ebp - 4] 127 | mov esp, ebp 128 | pop ebp 129 | ret 130 | mov esp, ebp 131 | pop ebp 132 | ret 133 | push ebp 134 | mov ebp, esp 135 | sub esp, 0 136 | mov eax, 1 137 | push eax 138 | mov eax, dword [ebp + 8] 139 | push eax 140 | mov eax, dword [ebp + 8] 141 | push eax 142 | call 0xe 143 | add esp, 4 144 | push eax 145 | mov eax, 1 146 | mov ecx, eax 147 | pop eax 148 | xor edx, edx 149 | add eax, ecx 150 | mov edx, eax 151 | pop ecx 152 | pop ebx 153 | mov eax, 4 154 | int 0x80 155 | mov esp, ebp 156 | pop ebp 157 | ret 158 | push ebp 159 | mov ebp, esp 160 | sub esp, 0x24 161 | mov eax, 0xcccccccc 162 | push eax 163 | call 0x52 164 | add esp, 4 165 | mov eax, 0x68 166 | push eax 167 | push eax 168 | lea ebx, [ebp - 0x20] 169 | mov eax, 0 170 | add ebx, eax 171 | pop eax 172 | pop eax 173 | mov byte [ebx], al 174 | mov eax, 0x65 175 | push eax 176 | push eax 177 | lea ebx, [ebp - 0x20] 178 | mov eax, 1 179 | add ebx, eax 180 | pop eax 181 | pop eax 182 | mov byte [ebx], al 183 | mov eax, 0x6c 184 | push eax 185 | push eax 186 | lea ebx, [ebp - 0x20] 187 | mov eax, 2 188 | add ebx, eax 189 | pop eax 190 | pop eax 191 | mov byte [ebx], al 192 | mov eax, 0x6c 193 | push eax 194 | push eax 195 | lea ebx, [ebp - 0x20] 196 | mov eax, 3 197 | add ebx, eax 198 | pop eax 199 | pop eax 200 | mov byte [ebx], al 201 | mov eax, 0x6f 202 | push eax 203 | push eax 204 | lea ebx, [ebp - 0x20] 205 | mov eax, 4 206 | add ebx, eax 207 | pop eax 208 | pop eax 209 | mov byte [ebx], al 210 | mov eax, 0x2c 211 | push eax 212 | push eax 213 | lea ebx, [ebp - 0x20] 214 | mov eax, 5 215 | add ebx, eax 216 | pop eax 217 | pop eax 218 | mov byte [ebx], al 219 | mov eax, 0x20 220 | push eax 221 | push eax 222 | lea ebx, [ebp - 0x20] 223 | mov eax, 6 224 | add ebx, eax 225 | pop eax 226 | pop eax 227 | mov byte [ebx], al 228 | mov eax, 0x77 229 | push eax 230 | push eax 231 | lea ebx, [ebp - 0x20] 232 | mov eax, 7 233 | add ebx, eax 234 | pop eax 235 | pop eax 236 | mov byte [ebx], al 237 | mov eax, 0x6f 238 | push eax 239 | push eax 240 | lea ebx, [ebp - 0x20] 241 | mov eax, 8 242 | add ebx, eax 243 | pop eax 244 | pop eax 245 | mov byte [ebx], al 246 | mov eax, 0x72 247 | push eax 248 | push eax 249 | lea ebx, [ebp - 0x20] 250 | mov eax, 9 251 | add ebx, eax 252 | pop eax 253 | pop eax 254 | mov byte [ebx], al 255 | mov eax, 0x6c 256 | push eax 257 | push eax 258 | lea ebx, [ebp - 0x20] 259 | mov eax, 0xa 260 | add ebx, eax 261 | pop eax 262 | pop eax 263 | mov byte [ebx], al 264 | mov eax, 0x64 265 | push eax 266 | push eax 267 | lea ebx, [ebp - 0x20] 268 | mov eax, 0xb 269 | add ebx, eax 270 | pop eax 271 | pop eax 272 | mov byte [ebx], al 273 | mov eax, 0x21 274 | push eax 275 | push eax 276 | lea ebx, [ebp - 0x20] 277 | mov eax, 0xc 278 | add ebx, eax 279 | pop eax 280 | pop eax 281 | mov byte [ebx], al 282 | mov eax, 0xa 283 | push eax 284 | push eax 285 | lea ebx, [ebp - 0x20] 286 | mov eax, 0xd 287 | add ebx, eax 288 | pop eax 289 | pop eax 290 | mov byte [ebx], al 291 | mov eax, 0 292 | push eax 293 | push eax 294 | lea ebx, [ebp - 0x20] 295 | mov eax, 0xe 296 | add ebx, eax 297 | pop eax 298 | pop eax 299 | mov byte [ebx], al 300 | mov eax, 0 301 | push eax 302 | lea ebx, [ebp - 0x24] 303 | pop eax 304 | mov dword [ebx], eax 305 | mov eax, dword [ebp - 0x24] 306 | push eax 307 | mov eax, 0xa 308 | mov ecx, eax 309 | pop eax 310 | xor edx, edx 311 | cmp eax, ecx 312 | jge 0x202 313 | xor eax, eax 314 | inc eax 315 | jmp 0x204 316 | xor eax, eax 317 | test eax, eax 318 | je 0x256 319 | mov eax, dword [ebp - 0x24] 320 | push eax 321 | mov eax, 2 322 | mov ecx, eax 323 | pop eax 324 | xor edx, edx 325 | idiv ecx 326 | mov eax, edx 327 | push eax 328 | mov eax, 0 329 | mov ecx, eax 330 | pop eax 331 | xor edx, edx 332 | cmp eax, ecx 333 | jne 0x232 334 | xor eax, eax 335 | inc eax 336 | jmp 0x234 337 | xor eax, eax 338 | cmp eax, 0 339 | je 0x245 340 | lea eax, [ebp - 0x20] 341 | push eax 342 | call 0x52 343 | add esp, 4 344 | mov eax, 1 345 | push eax 346 | lea ebx, [ebp - 0x24] 347 | pop eax 348 | add dword [ebx], eax 349 | jmp 0x1eb 350 | mov esp, ebp 351 | pop ebp 352 | ret 353 | ``` 354 | -------------------------------------------------------------------------------- /arena.h: -------------------------------------------------------------------------------- 1 | #ifndef SIMPLE_ARENA 2 | #define SIMPLE_ARENA 3 | #include 4 | #include 5 | 6 | typedef struct arena_s 7 | { 8 | const char *tag; 9 | char *data; 10 | size_t reserved; 11 | size_t used; 12 | } arena_t; 13 | 14 | static int arena_create(arena_t **arena_out, const char *tag, size_t n) 15 | { 16 | *arena_out = NULL; 17 | char *ptr = malloc(n + sizeof(arena_t)); 18 | if(!ptr) 19 | return 1; 20 | arena_t *a = (arena_t*)ptr; 21 | ptr += sizeof(arena_t); 22 | a->tag = tag; 23 | a->data = ptr; 24 | a->reserved = n; 25 | a->used = 0; 26 | *arena_out = a; 27 | return 0; 28 | } 29 | 30 | static char *arena_alloc(arena_t *a, size_t n) 31 | { 32 | if(a->used + n > a->reserved) 33 | { 34 | printf("can't allocate %d bytes, out of memory for arena '%s' size: %d bytes / %d KB\n", n, a->tag, a->reserved, a->reserved / 1000); 35 | return NULL; 36 | } 37 | a->used += n; 38 | return &a->data[a->used - n]; 39 | } 40 | 41 | static void arena_destroy(arena_t **a) 42 | { 43 | if(!a) 44 | return; 45 | free(*a); 46 | *a = NULL; 47 | } 48 | 49 | #endif 50 | -------------------------------------------------------------------------------- /ast.h: -------------------------------------------------------------------------------- 1 | #ifndef AST_H 2 | #define AST_H 3 | #include 4 | #include 5 | #include "data_type.h" 6 | #include "arena.h" 7 | #include "parse.h" 8 | 9 | #define ENUM_BEGIN(typ) typedef enum { 10 | #define ENUM(nam) nam 11 | #define ENUM_VALUE(nam, val) nam = val 12 | #define ENUM_END(typ) } typ; 13 | #include "ast_node_type.h" 14 | 15 | #include "types.h" 16 | 17 | #undef ENUM_BEGIN 18 | #undef ENUM 19 | #undef ENUM_VALUE 20 | #undef ENUM_END 21 | 22 | #define ENUM_BEGIN(typ) static const char * typ ## _strings[] = { 23 | #define ENUM(nam) #nam 24 | #define ENUM_VALUE(nam, val) #nam 25 | #define ENUM_END( typ ) \ 26 | } \ 27 | ; \ 28 | static const char* typ##_to_string( int i ) \ 29 | { \ 30 | if ( i < 0 ) \ 31 | return "invalid"; \ 32 | return typ##_strings[i]; \ 33 | } 34 | #include "ast_node_type.h" 35 | 36 | typedef struct ast_node_s ast_node_t; 37 | 38 | #define IDENT_CHARLEN (64) 39 | 40 | typedef enum 41 | { 42 | LITERAL_INTEGER, 43 | LITERAL_NUMBER, 44 | LITERAL_STRING 45 | } ast_literal_type_t; 46 | 47 | typedef struct 48 | { 49 | struct linked_list *body; 50 | } ast_block_stmt_t; 51 | 52 | typedef struct 53 | { 54 | ast_literal_type_t type; 55 | 56 | union 57 | { 58 | char string[IDENT_CHARLEN]; //C's max identifier length is 31 iirc 59 | scalar_t scalar; 60 | integer_t integer; 61 | double vector[4]; 62 | }; 63 | } ast_literal_t; 64 | 65 | typedef struct 66 | { 67 | char name[IDENT_CHARLEN]; 68 | } ast_identifier_t; 69 | 70 | static void print_literal(ast_literal_t* lit) 71 | { 72 | //TODO: FIX non-integers 73 | if(lit->type == LITERAL_INTEGER) 74 | printf("literal %lld\n", lit->integer.value); 75 | else if(lit->type == LITERAL_NUMBER) 76 | printf("literal %Lf\n", lit->scalar.value); 77 | else if(lit->type == LITERAL_STRING) 78 | printf("literal '%s'\n", lit->string); 79 | else 80 | printf("literal ??????\n"); 81 | } 82 | 83 | typedef struct 84 | { 85 | ast_node_t *lhs; 86 | ast_node_t *rhs; 87 | int operator; 88 | } ast_bin_expr_t; 89 | 90 | typedef struct 91 | { 92 | ast_node_t *callee; 93 | ast_node_t *arguments[32]; 94 | int numargs; 95 | } ast_function_call_expr_t; 96 | 97 | typedef struct 98 | { 99 | ast_node_t *argument; 100 | int operator; 101 | int prefix; 102 | } ast_unary_expr_t; 103 | 104 | typedef struct 105 | { 106 | ast_node_t *lhs; 107 | ast_node_t *rhs; 108 | int operator; 109 | } ast_assignment_expr_t; 110 | 111 | typedef struct 112 | { 113 | ast_node_t *expr; 114 | } ast_expr_stmt_t; 115 | 116 | typedef struct 117 | { 118 | ast_node_t *test; 119 | ast_node_t *consequent; 120 | ast_node_t *alternative; 121 | } ast_if_stmt_t; 122 | 123 | typedef struct 124 | { 125 | ast_node_t *init; 126 | ast_node_t *test; 127 | ast_node_t *update; 128 | ast_node_t *body; 129 | } ast_for_stmt_t; 130 | 131 | typedef struct 132 | { 133 | ast_node_t *test; 134 | ast_node_t *body; 135 | } ast_while_stmt_t; 136 | 137 | typedef struct 138 | { 139 | ast_node_t *test; 140 | ast_node_t *body; 141 | } ast_do_while_stmt_t; 142 | 143 | typedef struct 144 | { 145 | ast_node_t *id; 146 | ast_node_t *parameters[32]; 147 | int numparms; 148 | ast_node_t *body; //no body means just forward declaration, just prototype function 149 | ast_node_t *return_data_type; 150 | int variadic; 151 | //TODO: access same named variables in different scopes 152 | ast_node_t *declarations[64]; //TODO: increase max amount of local variables, for now this'll do 153 | int numdeclarations; 154 | } ast_function_decl_t; 155 | 156 | typedef struct 157 | { 158 | struct linked_list *body; 159 | } ast_program_t; 160 | 161 | typedef struct 162 | { 163 | ast_node_t *argument; 164 | } ast_return_stmt_t; 165 | 166 | typedef struct 167 | { 168 | ast_node_t *object; 169 | ast_node_t *property; 170 | int computed; //unused atm 171 | int as_pointer; 172 | } ast_member_expr_t; 173 | 174 | enum TYPE_QUALIFIER 175 | { 176 | TQ_NONE = 0, 177 | TQ_CONST = 1, 178 | TQ_VOLATILE = 2, 179 | TQ_UNSIGNED = 4 180 | }; 181 | 182 | /* int,char,float,double etc...*/ 183 | typedef struct 184 | { 185 | int primitive_type; 186 | int qualifiers; 187 | } ast_primitive_t; 188 | 189 | //TODO: FIXME rename 190 | //maybe name is too generic? 191 | typedef struct 192 | { 193 | ast_node_t *data_type; 194 | int qualifiers; 195 | int array_size; 196 | } ast_data_type_t; 197 | 198 | typedef struct 199 | { 200 | char name[IDENT_CHARLEN]; 201 | ast_node_t* fields[32]; // TODO: increase N 202 | int numfields; 203 | } ast_struct_decl_t; 204 | 205 | typedef struct 206 | { 207 | ast_node_t *id; 208 | ast_node_t *data_type; 209 | ast_node_t *initializer_value; 210 | } ast_variable_decl_t; 211 | 212 | typedef struct 213 | { 214 | int opcode; 215 | } ast_emit_t; 216 | 217 | typedef struct 218 | { 219 | ast_node_t *subject; 220 | } ast_sizeof_t; 221 | 222 | typedef struct 223 | { 224 | ast_node_t *condition; 225 | ast_node_t *consequent; 226 | ast_node_t *alternative; 227 | } ast_ternary_expr_t; 228 | 229 | typedef struct 230 | { 231 | //maybe add break level, nested loops 232 | //keep track of which loop node we're in maybe 233 | int unused; 234 | } ast_break_stmt_t; 235 | 236 | typedef struct 237 | { 238 | ast_node_t *expr[16]; //TODO: increase N 239 | int numexpr; 240 | } ast_seq_expr_t; 241 | 242 | typedef struct 243 | { 244 | ast_node_t *type; 245 | ast_node_t *expr; 246 | } ast_cast_t; 247 | 248 | 249 | //typedef node 250 | //typedef unsigned char BYTE; 251 | 252 | typedef struct 253 | { 254 | char name[IDENT_CHARLEN]; 255 | ast_node_t *type; 256 | } ast_typedef_t; 257 | 258 | // enum node 259 | // enum colors { red, green, blue }; 260 | 261 | typedef struct 262 | { 263 | char name[IDENT_CHARLEN]; //enum name 264 | ast_node_t* values[32]; //holds the identifiers (ast_identifier) the enum value is the index 265 | int numvalues; 266 | } ast_enum_t; 267 | 268 | typedef struct 269 | { 270 | char ident[IDENT_CHARLEN]; 271 | int value; 272 | } ast_enum_value_t; 273 | 274 | struct ast_node_s 275 | { 276 | ast_node_t *parent; 277 | ast_node_type_t type; 278 | int start, end; 279 | int rvalue; 280 | union 281 | { 282 | ast_block_stmt_t block_stmt_data; 283 | ast_bin_expr_t bin_expr_data; 284 | ast_literal_t literal_data; 285 | ast_expr_stmt_t expr_stmt_data; 286 | ast_unary_expr_t unary_expr_data; 287 | ast_assignment_expr_t assignment_expr_data; 288 | ast_identifier_t identifier_data; 289 | ast_function_call_expr_t call_expr_data; 290 | ast_if_stmt_t if_stmt_data; 291 | ast_for_stmt_t for_stmt_data; 292 | ast_while_stmt_t while_stmt_data; 293 | ast_do_while_stmt_t do_while_stmt_data; 294 | ast_function_decl_t func_decl_data; 295 | ast_program_t program_data; 296 | ast_return_stmt_t return_stmt_data; 297 | ast_member_expr_t member_expr_data; 298 | ast_variable_decl_t variable_decl_data; 299 | ast_primitive_t primitive_data; 300 | ast_emit_t emit_data; 301 | ast_sizeof_t sizeof_data; 302 | ast_ternary_expr_t ternary_expr_data; 303 | ast_break_stmt_t break_stmt_data; 304 | ast_seq_expr_t seq_expr_data; 305 | ast_cast_t cast_data; 306 | ast_data_type_t data_type_data; 307 | ast_struct_decl_t struct_decl_data; 308 | ast_typedef_t typedef_data; 309 | ast_enum_t enum_data; 310 | ast_enum_value_t enum_value_data; 311 | }; 312 | }; 313 | 314 | static void ast_print_node_type(const char* key, ast_node_t* n) 315 | { 316 | printf("node type: %s -> %s\n", key, ast_node_type_t_to_string(n->type)); 317 | } 318 | 319 | struct ast_context 320 | { 321 | arena_t *allocator; 322 | ast_node_t *program_node; 323 | ast_node_t *function; 324 | ast_node_t *default_function; 325 | struct hash_map *type_definitions; 326 | int numtypes; 327 | 328 | int verbose; 329 | 330 | struct parse_context parse_context; 331 | jmp_buf jmp; 332 | }; 333 | 334 | typedef struct ast_context ast_context_t; 335 | void ast_init_context(ast_context_t *ctx, arena_t *allocator); 336 | bool ast_process_tokens(ast_context_t*, struct token* tokens, int num_tokens); 337 | 338 | //TODO: refactor traverse_context name to ast 339 | 340 | typedef int (*traversal_fn_t)(ast_node_t*, void*); 341 | 342 | typedef struct 343 | { 344 | jmp_buf jmp; 345 | traversal_fn_t visitor; 346 | void* userdata; 347 | size_t visiteestacksize; 348 | ast_node_t* visiteestack[8]; 349 | int single_result; 350 | int overflow; 351 | ast_node_t **results; 352 | size_t maxresults, numresults; 353 | } traverse_context_t; 354 | 355 | ast_node_t* ast_tree_traverse(traverse_context_t* ctx, ast_node_t* head, traversal_fn_t visitor, void* userdata); 356 | ast_node_t* ast_tree_node_by_type(traverse_context_t* ctx, ast_node_t* head, int type); 357 | ast_node_t* ast_tree_node_by_identifier(traverse_context_t* ctx, ast_node_t* head, const char* id, int type); 358 | ast_node_t* ast_tree_traverse_get_visitee(traverse_context_t* ctx, size_t index); 359 | size_t ast_tree_nodes_by_type(traverse_context_t* ctx, ast_node_t* head, int type, ast_node_t** results, size_t maxresults); 360 | ast_node_t* ast_tree_node_by_node(traverse_context_t* ctx, ast_node_t* head, ast_node_t* node); 361 | 362 | #endif 363 | -------------------------------------------------------------------------------- /ast_node_type.h: -------------------------------------------------------------------------------- 1 | ENUM_BEGIN(ast_node_type_t) 2 | ENUM(AST_NONE), 3 | ENUM(AST_IDENTIFIER), 4 | ENUM(AST_LITERAL), 5 | ENUM(AST_UNARY_EXPR), 6 | ENUM(AST_BIN_EXPR), 7 | ENUM(AST_TERNARY_EXPR), 8 | ENUM(AST_EXPR_STMT), 9 | ENUM(AST_ASSIGNMENT_EXPR), 10 | ENUM(AST_FUNCTION_CALL_EXPR), 11 | ENUM(AST_IF_STMT), 12 | ENUM(AST_FOR_STMT), 13 | ENUM(AST_WHILE_STMT), 14 | ENUM(AST_DO_WHILE_STMT), 15 | ENUM(AST_BLOCK_STMT), 16 | ENUM(AST_FUNCTION_DECL), 17 | ENUM(AST_PROGRAM), 18 | ENUM(AST_RETURN_STMT), 19 | ENUM(AST_MEMBER_EXPR), 20 | ENUM(AST_STRUCT_MEMBER_EXPR), 21 | ENUM(AST_VARIABLE_DECL), 22 | ENUM(AST_PRIMITIVE), 23 | ENUM(AST_ARRAY_DATA_TYPE), 24 | ENUM(AST_POINTER_DATA_TYPE), 25 | ENUM(AST_STRUCT_DATA_TYPE), 26 | ENUM(AST_STRUCT_DECL), 27 | ENUM(AST_UNION_DECL), 28 | ENUM(AST_SIZEOF), 29 | ENUM(AST_EMIT), 30 | ENUM(AST_BREAK_STMT), 31 | ENUM(AST_SEQ_EXPR), 32 | ENUM(AST_CAST), 33 | ENUM(AST_EMPTY), 34 | ENUM(AST_TYPEDEF), 35 | ENUM(AST_DATA_TYPE), 36 | ENUM(AST_EXIT), 37 | ENUM(AST_ENUM), 38 | ENUM(AST_ENUM_VALUE), 39 | ENUM_VALUE(AST_INVALID, -1) 40 | ENUM_END(ast_node_type_t) 41 | -------------------------------------------------------------------------------- /buffer_util.h: -------------------------------------------------------------------------------- 1 | #ifndef BUFFER_UTIL_H 2 | #define BUFFER_UTIL_H 3 | 4 | #include "compile.h" 5 | 6 | static int instruction_position(compiler_t *ctx) 7 | { 8 | return heap_string_size(&ctx->function->bytecode); 9 | } 10 | 11 | static void dd(compiler_t *ctx, u32 i) 12 | { 13 | union 14 | { 15 | uint32_t i; 16 | uint8_t b[4]; 17 | } u = { .i = i }; 18 | 19 | for(size_t i = 0; i < 4; ++i) 20 | heap_string_push(&ctx->function->bytecode, u.b[i]); 21 | } 22 | 23 | static void dw(compiler_t *ctx, u16 i) 24 | { 25 | union 26 | { 27 | uint16_t s; 28 | uint8_t b[2]; 29 | } u = { .s = i }; 30 | 31 | heap_string_push(&ctx->function->bytecode, u.b[0]); 32 | heap_string_push(&ctx->function->bytecode, u.b[1]); 33 | } 34 | 35 | static void db(compiler_t *ctx, u8 op) 36 | { 37 | heap_string_push(&ctx->function->bytecode, op); 38 | } 39 | 40 | static void set8(compiler_t *ctx, int offset, u8 op) 41 | { 42 | ctx->function->bytecode[offset] = op; 43 | } 44 | 45 | static void set32(compiler_t *ctx, int offset, u32 value) 46 | { 47 | u32 *ptr = (u32*)&ctx->function->bytecode[offset]; 48 | *ptr = value; 49 | } 50 | 51 | static void buf(compiler_t *ctx, const char *buf, size_t len) 52 | { 53 | for(size_t i = 0; i < len; ++i) 54 | { 55 | heap_string_push(&ctx->function->bytecode, buf[i] & 0xff); 56 | } 57 | } 58 | #endif -------------------------------------------------------------------------------- /codegen.h: -------------------------------------------------------------------------------- 1 | #ifndef CODEGEN_H 2 | #define CODEGEN_H 3 | 4 | typedef enum vreg_s vreg_t; 5 | typedef int reg_t; 6 | typedef struct compiler_s compiler_t; 7 | typedef struct reljmp_s reljmp_t; 8 | 9 | typedef struct reginfo_s 10 | { 11 | const char *name; 12 | int id; 13 | int bits; 14 | int slot; 15 | //TODO: FIXME should probably later be a stack of vreg values 16 | int usecount; 17 | } reginfo_t; 18 | 19 | typedef struct lvalue_s 20 | { 21 | int offset_type; //e.g global variable memory address, stack offset 22 | int offset; 23 | struct ast_node *data_type; 24 | int size; 25 | } lvalue_t; 26 | 27 | struct codegen_s 28 | { 29 | reginfo_t *reginfo; 30 | size_t numreginfo; 31 | 32 | reg_t (*map_register)(compiler_t *ctx, vreg_t); 33 | void (*unmap_register)(compiler_t *ctx, reg_t); 34 | const char *(*register_name)(compiler_t *ctx, reg_t); 35 | 36 | //---------------------------------------- 37 | reg_t (*add)(compiler_t* ctx, reg_t a, reg_t b); 38 | reg_t (*sub)(compiler_t* ctx, reg_t a, reg_t b); 39 | reg_t (*mod)(compiler_t* ctx, reg_t a, reg_t b); 40 | reg_t (*imul)(compiler_t* ctx, reg_t reg); 41 | reg_t (*idiv)(compiler_t* ctx, reg_t reg); 42 | reg_t (*add_imm8_to_r32)(compiler_t* ctx, reg_t a, u8 value); 43 | reg_t (*add_imm32_to_r32)(compiler_t* ctx, reg_t a, u32 value); 44 | reg_t (*inc)(compiler_t* ctx, reg_t reg); 45 | reg_t (*neg)(compiler_t* ctx, reg_t reg); 46 | reg_t (*sub_regn_imm32)(compiler_t* ctx, reg_t reg, i32 imm); 47 | 48 | //---------------------------------------- 49 | reg_t (*xor)(compiler_t* ctx, reg_t a, reg_t b); 50 | reg_t (*and)(compiler_t* ctx, reg_t a, reg_t b); 51 | reg_t (*or)(compiler_t* ctx, reg_t a, reg_t b); 52 | 53 | //---------------------------------------- 54 | void (*int3)(compiler_t *ctx); 55 | void (*nop)(compiler_t* ctx); 56 | void (*invoke_syscall)(compiler_t* ctx, struct ast_node** args, int numargs); 57 | void (*exit_instr)(compiler_t* ctx, reg_t reg); 58 | //void (*int_imm8)(compiler_t *ctx, u8 value); //don't expose directly, just use invoke_syscall and other functions 59 | 60 | //---------------------------------------- 61 | void (*push)(compiler_t *ctx, reg_t reg); 62 | void (*pop)(compiler_t *ctx, reg_t reg); 63 | void (*load_reg)(compiler_t* ctx, reg_t a, reg_t b); 64 | void (*store_reg)(compiler_t* ctx, reg_t a, reg_t b); 65 | void (*load_regn_base_offset_imm32)(compiler_t* ctx, reg_t reg, i32 imm); 66 | 67 | //---------------------------------------- 68 | void (*ret)(compiler_t* ctx); 69 | int (*indirect_call_imm32)(compiler_t* ctx, intptr_t loc, int* address_loc); 70 | void (*call_imm32)(compiler_t* ctx, int loc); 71 | void (*call_r32)(compiler_t* ctx, reg_t reg); 72 | 73 | //---------------------------------------- 74 | void (*mov_r_imm32)(compiler_t* ctx, reg_t reg, i32 imm, int* data_loc); 75 | void (*mov_r_string)(compiler_t* ctx, reg_t reg, const char* str); 76 | reg_t(*mov)(compiler_t* ctx, reg_t a, reg_t b); 77 | 78 | //---------------------------------------- 79 | void (*cmp)(compiler_t* ctx, reg_t a, reg_t b); 80 | void (*test)(compiler_t* ctx, reg_t a, reg_t b); 81 | int (*if_beg)(compiler_t* ctx, reg_t a, int operator, reg_t b, int* offset); 82 | int (*if_else)(compiler_t* ctx, int* offset); 83 | int (*if_end)(compiler_t* ctx, int* offset); 84 | void (*jmp_begin)(compiler_t* ctx, reljmp_t* jmp, int type); 85 | void (*jmp_end)(compiler_t* ctx, reljmp_t* jmp); 86 | 87 | //---------------------------------------- 88 | int (*add_data)(compiler_t* ctx, void* data, u32 data_size); 89 | 90 | void (*load_value_offset_from_stack_to_register)(compiler_t *, reg_t reg, int offset, int data_size); 91 | void (*load_lvalue_address_to_register)(compiler_t*,reg_t,lvalue_t*); 92 | void (*store_value_offset_from_register_to_stack)(compiler_t *, reg_t reg, int offset, int data_size); 93 | }; 94 | 95 | typedef struct codegen_s codegen_t; 96 | 97 | #endif -------------------------------------------------------------------------------- /compile.h: -------------------------------------------------------------------------------- 1 | #ifndef COMPILE_H 2 | #define COMPILE_H 3 | 4 | #include "types.h" 5 | #include "rhd/heap_string.h" 6 | #include "data_type.h" 7 | #include "rhd/hash_string.h" 8 | #include "rhd/hash_map.h" 9 | #include "arena.h" 10 | #include "instruction.h" 11 | #include "register.h" 12 | #include "virtual_opcodes.h" 13 | #include 14 | 15 | enum 16 | { 17 | VREG_SP, 18 | VREG_BP, 19 | VREG_IP, 20 | VREG_RETURN_VALUE, 21 | VREG_MAX 22 | }; 23 | typedef int vreg_t; 24 | typedef int reg_t; 25 | 26 | typedef struct 27 | { 28 | int nbits; 29 | int indexed; 30 | union 31 | { 32 | int64_t dq; 33 | int32_t dd[2]; 34 | int16_t dw[4]; 35 | int8_t db[8]; 36 | }; 37 | } vregval_t; 38 | 39 | static void setvregval(vregval_t *rv, int i) 40 | { 41 | rv->indexed = 0; 42 | rv->nbits = 32; 43 | rv->dd[0] = i; 44 | } 45 | 46 | static int getvregval(vregval_t *rv) 47 | { 48 | switch(rv->nbits) 49 | { 50 | case 32: 51 | return rv->dd[0]; 52 | case 64: 53 | return rv->dq; 54 | case 8: 55 | return rv->db[0]; 56 | case 16: 57 | return rv->dw[0]; 58 | } 59 | } 60 | 61 | static void setvregvalindex(vregval_t *rv, int index) 62 | { 63 | rv->indexed = 1; 64 | rv->nbits = 32; 65 | rv->dd[0] = index; 66 | } 67 | 68 | typedef struct variable_s 69 | { 70 | int offset; 71 | int is_param; 72 | struct ast_node_s *data_type_node; 73 | } variable_t; 74 | 75 | #define FUNCTION_NAME_MAX_CHARACTERS (64) 76 | #define VOPCACHE_MAX (64) 77 | 78 | typedef struct 79 | { 80 | voperand_t key, value; 81 | } vopcache_t; 82 | 83 | typedef struct function_s 84 | { 85 | char name[FUNCTION_NAME_MAX_CHARACTERS]; 86 | 87 | struct hash_map *arguments; 88 | struct hash_map *variables; 89 | int localvariablesize; 90 | 91 | heap_string bytecode; 92 | 93 | vinstr_t *instructions; 94 | size_t instruction_index; 95 | size_t index; 96 | 97 | /* vinstr_t *returns[32]; */ 98 | /* size_t numreturns; */ 99 | 100 | vopcache_t vopcache[VOPCACHE_MAX]; 101 | size_t vopcacheindex; 102 | voperand_t eoflabel; 103 | int argcost; 104 | int returnsize; 105 | } function_t; 106 | 107 | #define FUNCTION_MAX_INSTRUCTIONS (256) 108 | 109 | struct reljmp_s 110 | { 111 | i32 data_index; 112 | i32 ip; 113 | int type; 114 | }; 115 | typedef struct reljmp_s reljmp_t; 116 | 117 | #define RJ_JNZ (1) 118 | #define RJ_JZ (2) 119 | 120 | #define RJ_JNE (1) 121 | #define RJ_JE (2) 122 | #define RJ_JL (4) 123 | #define RJ_JLE (8) 124 | #define RJ_JG (16) 125 | #define RJ_JGE (32) 126 | #define RJ_JMP (64) 127 | #define RJ_REVERSE (1<<30) 128 | 129 | typedef struct 130 | { 131 | /* vinstr_t *breaks[32]; */ 132 | /* size_t maxbreaks; */ 133 | /* size_t numbreaks; */ 134 | voperand_t breaklabel; 135 | } scope_t; 136 | 137 | enum RELOC_TYPE 138 | { 139 | RELOC_CODE, 140 | RELOC_DATA, 141 | RELOC_IMPORT 142 | }; 143 | 144 | struct relocation 145 | { 146 | enum RELOC_TYPE type; 147 | size_t size; 148 | intptr_t from; 149 | intptr_t to; 150 | }; 151 | 152 | enum BUILD_TARGET 153 | { 154 | BT_UNKNOWN, 155 | BT_LINUX_X86, 156 | BT_LINUX_X64, 157 | BT_WIN32, 158 | BT_WIN64, 159 | BT_MEMORY, 160 | BT_OPCODES 161 | }; 162 | 163 | static const char* build_target_strings[] = { "Unknown", "Linux ELF x86", "Linux ELF x64", "Win32", "Win64", "Memory", "Opcodes", NULL}; 164 | 165 | struct dynlib_sym 166 | { 167 | const char* lib_name; 168 | const char* sym_name; 169 | intptr_t offset; //TODO: FIXME if and when we ever compile for cross platform or x86/x64 this should probably be changed to match the target binary format. 170 | hash_t hash; 171 | }; 172 | 173 | typedef struct dynlib_sym* (*find_import_fn_t)(void *userptr, const char *key); 174 | 175 | typedef struct 176 | { 177 | int index; 178 | size_t length; 179 | char *buffer; 180 | } indexed_data_t; 181 | 182 | #define MAX_INDEXED_DATA 256 //TODO: dynamically increase 183 | 184 | typedef struct 185 | { 186 | //size in bits of each type 187 | int longsize; 188 | int intsize; 189 | int shortsize; 190 | int charsize; 191 | int floatsize; 192 | int doublesize; 193 | int pointersize; 194 | } fundamental_type_size_t; 195 | 196 | #define COMPILER_MAX_FUNCTIONS (64) 197 | #define COMPILER_MAX_SCOPES (16) 198 | 199 | typedef enum 200 | { 201 | COMPILER_FLAGS_NONE, 202 | COMPILER_FLAGS_ALU_THREE_OPERANDS, 203 | COMPILER_FLAGS_INDIRECT_ADDRESSING 204 | } compiler_flags_t; 205 | 206 | struct compiler_s 207 | { 208 | arena_t *allocator; 209 | int numbits; 210 | int flags; 211 | 212 | jmp_buf jmp; 213 | 214 | int build_target; 215 | u32 entry; 216 | heap_string data; 217 | 218 | struct linked_list *relocations; 219 | 220 | fundamental_type_size_t fts; 221 | 222 | heap_string instr; 223 | 224 | function_t *function; 225 | 226 | indexed_data_t indexed_data[MAX_INDEXED_DATA]; 227 | int numindexeddata; 228 | 229 | scope_t *scope[COMPILER_MAX_SCOPES]; //TODO: N number of scopes, dynamic array / stack 230 | int scope_index; 231 | 232 | void* find_import_fn_userptr; 233 | find_import_fn_t find_import_fn; 234 | int (*rvalue)(struct compiler_s *ctx, vreg_t reg, struct ast_node *n); 235 | int (*lvalue)(struct compiler_s *ctx, vreg_t reg, struct ast_node *n); 236 | 237 | void (*print)(struct compiler_s *ctx, const char *fmt, ...); 238 | 239 | size_t numfunctions; 240 | struct hash_map *functions; 241 | 242 | size_t vregindex; 243 | size_t labelindex; 244 | }; 245 | 246 | typedef struct compiler_s compiler_t; 247 | 248 | int add_indexed_data(compiler_t *ctx, const void *buffer, size_t len); 249 | function_t *compiler_alloc_function(compiler_t *ctx, const char *name); 250 | void compiler_init(compiler_t *c, arena_t *allocator, int numbits, compiler_flags_t flags); 251 | #endif 252 | -------------------------------------------------------------------------------- /compile_flags.txt: -------------------------------------------------------------------------------- 1 | -g 2 | -m32 3 | -------------------------------------------------------------------------------- /data_type.h: -------------------------------------------------------------------------------- 1 | #ifndef DATA_TYPE 2 | #define DATA_TYPE 3 | #include 4 | 5 | enum DATA_TYPE 6 | { 7 | DT_CHAR, 8 | DT_SHORT, 9 | DT_INT, 10 | DT_FLOAT, 11 | DT_DOUBLE, 12 | DT_VOID, 13 | DT_LONG 14 | }; 15 | 16 | static const char* data_type_strings[] = { 17 | "char", "short", "int", "float", "double", "void", "long", NULL 18 | }; 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /elf.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "compile.h" 7 | #include "rhd/std.h" 8 | #include "rhd/linked_list.h" 9 | #include "util.h" 10 | #include "elf.h" 11 | 12 | int build_elf_image(compiler_t *ctx, const char *binary_path) 13 | { 14 | assert(sizeof(struct phdr32) == 0x20); 15 | heap_string instr = ctx->instr; 16 | heap_string data_buf = ctx->data; 17 | heap_string image = NULL; 18 | db(&image, 0x7f); 19 | db(&image, 'E'); 20 | db(&image, 'L'); 21 | db(&image, 'F'); 22 | db(&image, 1); 23 | db(&image, 1); 24 | db(&image, 1); 25 | db(&image, 0); 26 | 27 | for(size_t i = 0; i < 8; ++i) 28 | db(&image, 0); 29 | dw(&image, 2); //e_type 30 | dw(&image, 3); //e_machine 31 | dd(&image, 1); //e_version 32 | 33 | size_t entry_offset = heap_string_size(&image); 34 | dd(&image, 0); //we'll fill this in later //e_entry 35 | dd(&image, 0x34); //e_phoff 36 | 37 | dd(&image, 0); //e_shoff 38 | 39 | dd(&image, 0); //e_flags 40 | dw(&image, 0x34); //e_ehsize 41 | dw(&image, sizeof(struct phdr32)); //e_phentsize 42 | 43 | u32 num_program_headers = 2; 44 | if(ctx->data) 45 | ++num_program_headers; 46 | 47 | dw(&image, num_program_headers); //e_phnum //amount of program headers 48 | dw(&image, 0); //e_shentsize 49 | dw(&image, 0); //e_shnum 50 | dw(&image, 0); //e_shstrndx 51 | 52 | #define ORG (0x08048000) 53 | #define ALIGNMENT (0x1000) 54 | 55 | int phdr_offset = heap_string_size(&image); 56 | 57 | int null_hdr_offset = heap_string_size(&image); 58 | pad(&image, sizeof(struct phdr32)); 59 | int text_hdr_offset = heap_string_size(&image); 60 | pad(&image, sizeof(struct phdr32)); 61 | 62 | int data_hdr_offset = heap_string_size(&image); 63 | if(ctx->data) 64 | pad(&image, sizeof(struct phdr32)); 65 | 66 | int phdr_end = heap_string_size(&image); 67 | 68 | //get pointers now because image pointer is different than before after the reallocation 69 | struct phdr32 *null_hdr = (struct phdr32*)&image[null_hdr_offset]; 70 | 71 | null_hdr->p_type = PT_LOAD; 72 | null_hdr->p_offset = 0; 73 | null_hdr->p_vaddr = ORG; 74 | null_hdr->p_paddr = ORG; 75 | null_hdr->p_filesz = phdr_end; 76 | null_hdr->p_memsz = phdr_end; 77 | null_hdr->p_flags = PF_R; 78 | null_hdr->p_align = ALIGNMENT; 79 | 80 | u32 il = heap_string_size(&instr); 81 | 82 | size_t entry = heap_string_size(&image); 83 | *(uint32_t*)(image + entry_offset) = ORG + ALIGNMENT; 84 | 85 | pad_align(&image, ALIGNMENT); 86 | 87 | u32 code_offset = heap_string_size(&image); 88 | 89 | struct phdr32 *text_hdr = (struct phdr32*)&image[text_hdr_offset]; 90 | text_hdr->p_type = PT_LOAD; 91 | text_hdr->p_offset = code_offset; 92 | text_hdr->p_vaddr = ORG + ALIGNMENT; 93 | text_hdr->p_paddr = ORG + ALIGNMENT; 94 | text_hdr->p_filesz = il; 95 | text_hdr->p_memsz = il; 96 | text_hdr->p_flags = PF_R | PF_X; 97 | text_hdr->p_align = ALIGNMENT; 98 | 99 | //put .data section here 100 | if(ctx->data) 101 | { 102 | int vaddr = ORG + ALIGNMENT + il; 103 | vaddr += align_to(vaddr, ALIGNMENT); 104 | 105 | //relocate everything to vaddr 106 | linked_list_reversed_foreach(ctx->relocations, struct relocation*, it, 107 | { 108 | if(it->type == RELOC_DATA) 109 | { 110 | *(u32*)&instr[it->from] = it->to + vaddr; 111 | printf("[DATA] relocating %d bytes from %02X to %02X\n", it->size, it->from, it->to + vaddr); 112 | } 113 | else if(it->type == RELOC_CODE) 114 | { 115 | *(u32*)&instr[it->from] = it->to + (ORG + ALIGNMENT); 116 | printf("[CODE] relocating %d bytes from %02X to %02X\n", it->size, it->from, it->to + ORG); 117 | } else 118 | { 119 | printf("unknown relocation type %d\n", it->type); 120 | } 121 | }); 122 | 123 | for(int i = 0; i < il; ++i) 124 | { 125 | db(&image, instr[i]); 126 | } 127 | 128 | pad_align(&image, ALIGNMENT); 129 | 130 | u32 data_offset = heap_string_size(&image); 131 | u32 dl = heap_string_size(&data_buf); 132 | 133 | struct phdr32 *data_hdr = (struct phdr32*)&image[data_hdr_offset]; 134 | data_hdr->p_type = PT_LOAD; 135 | data_hdr->p_offset = data_offset; 136 | data_hdr->p_vaddr = vaddr; 137 | data_hdr->p_paddr = vaddr; 138 | data_hdr->p_filesz = dl; 139 | data_hdr->p_memsz = dl; 140 | data_hdr->p_flags = PF_R | PF_W; 141 | data_hdr->p_align = ALIGNMENT; 142 | 143 | for(int i = 0; i < dl; ++i) 144 | { 145 | db(&image, data_buf[i]); 146 | } 147 | } else 148 | { 149 | linked_list_reversed_foreach(ctx->relocations, struct relocation*, it, 150 | { 151 | if(it->type == RELOC_CODE) 152 | { 153 | *(u32*)&instr[it->from] = it->to + (ORG + ALIGNMENT); 154 | printf("[CODE] relocating %d bytes from %02X to %02X\n", it->size, it->from, it->to + ORG); 155 | } else 156 | { 157 | printf("unknown relocation type %d\n", it->type); 158 | } 159 | }); 160 | for(int i = 0; i < il; ++i) 161 | { 162 | db(&image, instr[i]); 163 | } 164 | } 165 | 166 | size_t filesize = heap_string_size(&image); 167 | FILE* fp; 168 | std_fopen_s(&fp, binary_path, "wb"); 169 | if(!fp) 170 | { 171 | char errorMessage[1024]; 172 | std_strerror_s(errorMessage, sizeof(errorMessage), errno); 173 | printf("failed to open '%s', error = %s\n", binary_path, errorMessage); 174 | return 1; 175 | } 176 | fwrite(image, filesize, 1, fp); 177 | fclose(fp); 178 | 179 | heap_string_free(&image); 180 | return 0; 181 | } 182 | -------------------------------------------------------------------------------- /elf.h: -------------------------------------------------------------------------------- 1 | #ifndef ELF_H 2 | #define ELF_H 3 | 4 | #include "types.h" 5 | 6 | PACK(struct phdr64 7 | { 8 | u32 p_type; 9 | u32 p_flags; 10 | u64 p_offset; 11 | u64 p_vaddr; 12 | u64 p_paddr; 13 | u64 p_filesz; 14 | u64 p_memsz; 15 | u64 p_align; 16 | }); 17 | 18 | PACK(struct phdr32 19 | { 20 | i32 p_type; 21 | u32 p_offset; 22 | u32 p_vaddr; 23 | u32 p_paddr; 24 | u32 p_filesz; 25 | u32 p_memsz; 26 | i32 p_flags; 27 | u32 p_align; 28 | }); 29 | 30 | enum 31 | { 32 | PF_X = 0x1, 33 | PF_W = 0x2, 34 | PF_R = 0x4 35 | }; 36 | 37 | enum 38 | { 39 | PT_NULL = 0x0, 40 | PT_LOAD = 0x1, 41 | PT_DYNAMIC = 0x2, 42 | PT_INTERP = 0x3, 43 | PT_NOTE = 0x4, 44 | PT_SHLIB = 0x5, 45 | PT_PHDR = 0x6, 46 | PT_TLS = 0x7 47 | }; 48 | 49 | #endif 50 | -------------------------------------------------------------------------------- /elf64.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "compile.h" 7 | #include "rhd/std.h" 8 | #include "rhd/linked_list.h" 9 | #include "util.h" 10 | #include "elf.h" 11 | 12 | int build_elf64_image(compiler_t *ctx, const char *binary_path) 13 | { 14 | assert(sizeof(struct phdr64) == 0x38); 15 | 16 | heap_string instr = ctx->instr; 17 | #if 0 18 | instr = NULL; 19 | 20 | db(&instr, 0xcc); 21 | db(&instr, 0xcc); 22 | db(&instr, 0xcc); 23 | 24 | db(&instr, 0x48); 25 | db(&instr, 0xc7); 26 | db(&instr, 0xc0); 27 | db(&instr, 0x3c); 28 | db(&instr, 0x00); 29 | db(&instr, 0x00); 30 | db(&instr, 0x00); 31 | //syscall 32 | db(&instr, 0x0f); 33 | db(&instr, 0x05); 34 | #endif 35 | 36 | heap_string data_buf = ctx->data; 37 | heap_string image = NULL; 38 | db(&image, 0x7f); 39 | db(&image, 'E'); 40 | db(&image, 'L'); 41 | db(&image, 'F'); 42 | db(&image, 2); //64 bits 43 | db(&image, 1); 44 | db(&image, 1); 45 | db(&image, 0); //System V 46 | 47 | for(size_t i = 0; i < 8; ++i) 48 | db(&image, 0); 49 | dw(&image, 2); //e_type 50 | dw(&image, 0x3e); //e_machine //AMD x86-64 //https://en.wikipedia.org/wiki/Executable_and_Linkable_Format 51 | dd(&image, 1); //e_version 52 | 53 | size_t entry_offset = heap_string_size(&image); 54 | dq(&image, 0); //we'll fill this in later //e_entry 55 | dq(&image, 0x40); //e_phoff 56 | 57 | dq(&image, 0); //e_shoff 58 | 59 | dd(&image, 0); //e_flags 60 | dw(&image, 0x34); //e_ehsize 61 | dw(&image, sizeof(struct phdr64)); //e_phentsize 62 | 63 | u32 num_program_headers = 2; 64 | if(ctx->data) 65 | ++num_program_headers; 66 | 67 | dw(&image, num_program_headers); //e_phnum //amount of program headers 68 | dw(&image, 0); //e_shentsize 69 | dw(&image, 0); //e_shnum 70 | dw(&image, 0); //e_shstrndx 71 | 72 | #define ORG (0x40000) 73 | #define ALIGNMENT (0x1000) 74 | 75 | int phdr_offset = heap_string_size(&image); 76 | 77 | int null_hdr_offset = heap_string_size(&image); 78 | pad(&image, sizeof(struct phdr64)); 79 | int text_hdr_offset = heap_string_size(&image); 80 | pad(&image, sizeof(struct phdr64)); 81 | 82 | int data_hdr_offset = heap_string_size(&image); 83 | if(ctx->data) 84 | pad(&image, sizeof(struct phdr64)); 85 | 86 | int phdr_end = heap_string_size(&image); 87 | 88 | //get pointers now because image pointer is different than before after the reallocation 89 | struct phdr64 *null_hdr = (struct phdr64*)&image[null_hdr_offset]; 90 | 91 | null_hdr->p_type = PT_NULL; 92 | null_hdr->p_flags = PF_R; 93 | null_hdr->p_offset = 0; 94 | null_hdr->p_vaddr = ORG; 95 | null_hdr->p_paddr = ORG; 96 | null_hdr->p_filesz = phdr_end; 97 | null_hdr->p_memsz = phdr_end; 98 | null_hdr->p_align = ALIGNMENT; 99 | 100 | u32 il = heap_string_size(&instr); 101 | 102 | size_t entry = heap_string_size(&image); 103 | *(uint32_t*)(image + entry_offset) = ORG + ALIGNMENT; 104 | 105 | pad_align(&image, ALIGNMENT); 106 | 107 | u32 code_offset = heap_string_size(&image); 108 | 109 | struct phdr64 *text_hdr = (struct phdr64*)&image[text_hdr_offset]; 110 | text_hdr->p_type = PT_LOAD; 111 | text_hdr->p_flags = PF_R | PF_X; 112 | text_hdr->p_offset = code_offset; 113 | text_hdr->p_vaddr = ORG + ALIGNMENT; 114 | text_hdr->p_paddr = ORG + ALIGNMENT; 115 | text_hdr->p_filesz = il; 116 | text_hdr->p_memsz = il; 117 | text_hdr->p_align = ALIGNMENT; 118 | 119 | //put .data section here 120 | if(ctx->data) 121 | { 122 | int vaddr = ORG + ALIGNMENT + il; 123 | vaddr += align_to(vaddr, ALIGNMENT); 124 | 125 | //relocate everything to vaddr 126 | //TODO: FIXME 64-bit addresses 127 | linked_list_reversed_foreach(ctx->relocations, struct relocation*, it, 128 | { 129 | if(it->type == RELOC_DATA) 130 | { 131 | *(u32*)&instr[it->from] = it->to + vaddr; 132 | printf("[DATA] relocating %d bytes from %02X to %02X\n", it->size, it->from, it->to + vaddr); 133 | } 134 | else if(it->type == RELOC_CODE) 135 | { 136 | *(u32*)&instr[it->from] = it->to + (ORG + ALIGNMENT); 137 | printf("[CODE] relocating %d bytes from %02X to %02X\n", it->size, it->from, it->to + ORG); 138 | } else 139 | { 140 | printf("unknown relocation type %d\n", it->type); 141 | } 142 | }); 143 | 144 | for(int i = 0; i < il; ++i) 145 | { 146 | db(&image, instr[i]); 147 | } 148 | 149 | pad_align(&image, ALIGNMENT); 150 | 151 | u32 data_offset = heap_string_size(&image); 152 | u32 dl = heap_string_size(&data_buf); 153 | 154 | struct phdr64 *data_hdr = (struct phdr64*)&image[data_hdr_offset]; 155 | data_hdr->p_type = PT_LOAD; 156 | data_hdr->p_flags = PF_R | PF_W; 157 | data_hdr->p_offset = data_offset; 158 | data_hdr->p_vaddr = vaddr; 159 | data_hdr->p_paddr = vaddr; 160 | data_hdr->p_filesz = dl; 161 | data_hdr->p_memsz = dl; 162 | data_hdr->p_align = ALIGNMENT; 163 | 164 | for(int i = 0; i < dl; ++i) 165 | { 166 | db(&image, data_buf[i]); 167 | } 168 | } else 169 | { 170 | #if 0 171 | linked_list_reversed_foreach(ctx->relocations, struct relocation*, it, 172 | { 173 | if(it->type == RELOC_CODE) 174 | { 175 | *(u32*)&instr[it->from] = it->to + (ORG + ALIGNMENT); 176 | printf("[CODE] relocating %d bytes from %02X to %02X\n", it->size, it->from, it->to + ORG); 177 | } else 178 | { 179 | printf("unknown relocation type %d\n", it->type); 180 | } 181 | }); 182 | for(int i = 0; i < il; ++i) 183 | { 184 | db(&image, instr[i]); 185 | } 186 | #endif 187 | printf("unhandled for now..\n"); 188 | getchar(); 189 | } 190 | 191 | size_t filesize = heap_string_size(&image); 192 | FILE* fp; 193 | std_fopen_s(&fp, binary_path, "wb"); 194 | if(!fp) 195 | { 196 | char errorMessage[1024]; 197 | std_strerror_s(errorMessage, sizeof(errorMessage), errno); 198 | printf("failed to open '%s', error = %s\n", binary_path, errorMessage); 199 | return 1; 200 | } 201 | fwrite(image, filesize, 1, fp); 202 | fclose(fp); 203 | 204 | heap_string_free(&image); 205 | return 0; 206 | } 207 | -------------------------------------------------------------------------------- /emu.c: -------------------------------------------------------------------------------- 1 | #include "imm.h" 2 | #include "instruction.h" 3 | #include "operand.h" 4 | 5 | static const char* x64_register_strings[] = { 6 | "AL", "BL", "CL", "DL", "AH", "BH", "CH", "DH", "AX", "BX", "CX", "DX", "EAX", 7 | "ECX", "EDX", "EBX", "ESP", "EBP", "ESI", "EDI", "R8B", "R9B", "R10B", "R11B", "R12B", "R13B", 8 | "R14B", "R15B", "R8W", "R9W", "R10W", "R11W", "R12W", "R13W", "R14W", "R15W", "R8D", "R9D", "R10D", 9 | "R11D", "R12D", "R13D", "R14D", "R15D", "RAX", "RCX", "RDX", "RBX", "RSP", "RBP", "RSI", "RDI", 10 | "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15", "XMM0", "XMM1", "XMM2", "XMM3", "XMM4", 11 | "XMM5", "XMM6", "XMM7", "YMM0", "YMM1", "YMM2", "YMM3", "YMM4", "YMM5", "YMM6", "YMM7", NULL}; 12 | 13 | typedef enum 14 | { 15 | AL, 16 | BL, 17 | CL, 18 | DL, // lower 8-bit registers 19 | AH, 20 | BH, 21 | CH, 22 | DH, // upper 8-bit registers 23 | AX, 24 | BX, 25 | CX, 26 | DX, // 16-bit registers 27 | EAX, 28 | ECX, 29 | EDX, 30 | EBX, 31 | ESP, 32 | EBP, 33 | ESI, 34 | EDI, // 32-bit registers 35 | R8B, 36 | R9B, 37 | R10B, 38 | R11B, 39 | R12B, 40 | R13B, 41 | R14B, 42 | R15B, // lowermost 8-bits register 43 | R8W, 44 | R9W, 45 | R10W, 46 | R11W, 47 | R12W, 48 | R13W, 49 | R14W, 50 | R15W, // lowermost 16-bits register 51 | R8D, 52 | R9D, 53 | R10D, 54 | R11D, 55 | R12D, 56 | R13D, 57 | R14D, 58 | R15D, // lowermost 32-bits register 59 | RAX, 60 | RCX, 61 | RDX, 62 | RBX, 63 | RSP, 64 | RBP, 65 | RSI, 66 | RDI, 67 | R8, 68 | R9, 69 | R10, 70 | R11, 71 | R12, 72 | R13, 73 | R14, 74 | R15, // 64-bit registers 75 | XMM0, 76 | XMM1, 77 | XMM2, 78 | XMM3, 79 | XMM4, 80 | XMM5, 81 | XMM6, 82 | XMM7, // SSE2 83 | YMM0, 84 | YMM1, 85 | YMM2, 86 | YMM3, 87 | YMM4, 88 | YMM5, 89 | YMM6, 90 | YMM7, // AVX 91 | X64_REGISTER_MAX 92 | } x64_register_t; 93 | 94 | // list of registers that overlap due to being lower N bits 95 | static const int x64_register_slots[][6] = {{RAX, EAX, AX, AL, AH, -1}, 96 | {RCX, ECX, CX, CL, CH, -1}, 97 | {RDX, EDX, DX, DL, DH, -1}, 98 | {RBX, EBX, BX, BL, BH, -1}, 99 | {RSP, ESP, -1}, 100 | {RBP, EBP, -1}, 101 | {RSI, ESI, -1}, 102 | {RDI, EDI, -1}, 103 | {R8, R8D, R8W, R8B, -1}, 104 | {R9, R9D, R9W, R9B, -1}, 105 | {R10, R10D, R10W, R10B, -1}, 106 | {R11, R11D, R11W, R11B, -1}, 107 | {R12, R12D, R12W, R12B, -1}, 108 | {R13, R13D, R13W, R13B, -1}, 109 | {R14, R14D, R14W, R14B, -1}, 110 | {R15, R15D, R15W, R15B, -1}, 111 | {XMM0, YMM0, -1}, 112 | {XMM1, YMM1, -1}, 113 | {XMM2, YMM2, -1}, 114 | {XMM3, YMM3, -1}, 115 | {XMM4, YMM4, -1}, 116 | {XMM5, YMM5, -1}, 117 | {XMM6, YMM6, -1}, 118 | {XMM7, YMM7, -1}}; 119 | 120 | static struct 121 | { 122 | int bits; 123 | x64_register_t registers[17]; 124 | } x64_register_bits[] = { 125 | {256, {YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, -1}}, 126 | {128, {XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, -1}}, 127 | {64, {RAX, RCX, RDX, RBX, /*RSP,RBP,*/ RSI, RDI, R8, R9, R10, R11, R12, R13, R14, R15, -1}}, 128 | {32, {EAX, ECX, EDX, EBX, /*ESP,EBP,*/ ESI, EDI, R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D, -1}}, 129 | {16, {AX, BX, CX, DX, R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W, -1}}, 130 | {8, {AL, BL, CL, DL, AH, BH, CH, DH, R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B, -1}}}; 131 | 132 | typedef enum 133 | { 134 | CF = 1, 135 | PF = 0x4, 136 | AF = 0x10, 137 | ZF = 0x40, 138 | SF = 0x80, 139 | TP = 0x100, 140 | IF = 0x200, 141 | DF = 0x400, 142 | OF = 0x800 143 | } x64_flags_t; 144 | 145 | static bool operand_size_native(voperand_t *o) 146 | { 147 | //in this case we're targetting x64, so either one is fine 148 | return o->size == VOPERAND_SIZE_NATIVE || o->size == VOPERAND_SIZE_64_BITS; 149 | } 150 | 151 | // instructions that default to 64-bit on long mode 152 | // call, enter, jcc, jrcxz, jmp (near), leave, lgdt, lidt, lldt, loop, loopcc, ltr, mov cr(n), mov dr(n), pop reg/mem, 153 | // pop reg pop fs, pop gs, popfq, push imm8, push imm32, push reg/mem, push reg, push fs, push gs, pushfq, ret (near) 154 | 155 | // W when 1 64-bit operand size is used 156 | // R modrm.reg field 157 | // X sib.index field 158 | // B modrm.rm field or sib.base field 159 | 160 | typedef struct 161 | { 162 | int W, R, X, B; 163 | } rex_fields_t; 164 | 165 | static u8 encode_rex_prefix(rex_fields_t rf) 166 | { 167 | rf.W = (rf.W != 0); 168 | rf.R = (rf.R != 0); 169 | rf.X = (rf.X != 0); 170 | rf.B = (rf.B != 0); 171 | // 0 7 172 | // B X R W 0 0 1 0 173 | // NOTE: assuming little-endian for now 174 | return 2 | (rf.W << 4) | (rf.R << 5) | (rf.X << 6) | (rf.B << 7); 175 | } 176 | 177 | // sets rex field W if needed and returns the value of the register used in the reg field of modrm 178 | // should be shifted to the reg position (x64 little-endian e.g << 3) 179 | // value depends on the instruction which register is used see 180 | // https://wiki.osdev.org/X86-64_Instruction_Encoding#Registers 181 | static u8 encode_register_reference(x64_register_t reg, 182 | rex_fields_t* rf) // can return max value of 15 (3 bits + extra rex field bit) = 4 183 | { 184 | if (reg >= R8 && reg <= R15) 185 | { 186 | rf->R = 1; 187 | return (reg - R8); 188 | } 189 | else if (reg >= RAX && reg <= RDI) 190 | return (reg - RAX); 191 | else if (reg >= EAX && reg <= EDI) 192 | return (reg - EAX); 193 | // TODO: add rest of registers 194 | return -1; 195 | } 196 | 197 | typedef struct 198 | { 199 | } x64_context_t; 200 | 201 | static void mov(x64_context_t* ctx, int dst, int src) 202 | { 203 | // TODO: first byte replace with 0x49 204 | // but just redo it all and just use bitflags 205 | // https://staffwww.fullcoll.edu/aclifton/cs241/lecture-instruction-format.html 206 | 207 | rex_fields_t rf = {0}; 208 | rf.W = (dst >= R8 && dst <= R15); 209 | rf.B = 0; 210 | rf.X = 0; 211 | rf.R = (src >= R8 && src <= R15); 212 | 213 | /* 214 | 7 0 215 | +---+---+---+---+---+---+---+---+ 216 | | mod | reg | rm | 217 | +---+---+---+---+---+---+---+---+ 218 | */ 219 | u8 modrm = 0; 220 | // for now just use disp32, which is b10 221 | 222 | // set first mod bit to 1 223 | modrm |= (1 << 7); 224 | 225 | // set dest reg 226 | modrm |= encode_register_reference(dst, &rf) << 3; 227 | 228 | u8 rex = encode_rex_prefix(rf); 229 | /* db(ctx, rex); */ 230 | /* db(ctx, 0x8b); // 8B r MOV r16/32/64 r/m16/32/64 */ 231 | /* db(ctx, modrm); */ 232 | /* dd(ctx, lv->offset); */ 233 | } 234 | 235 | static void add(voperand_t *dst, voperand_t *a, voperand_t *b) 236 | { 237 | 238 | } 239 | 240 | void handle_instruction(int opcode, vinstr_t *instr) 241 | { 242 | switch (opcode) 243 | { 244 | case VOP_ADD: 245 | { 246 | assert(instr->numoperands == 3); 247 | voperand_t* dst = &instr->operands[0]; 248 | voperand_t* a = &instr->operands[1]; 249 | voperand_t* b = &instr->operands[2]; 250 | 251 | switch (dst->size) 252 | { 253 | case VOPERAND_SIZE_NATIVE: 254 | case VOPERAND_SIZE_64_BITS: 255 | imm_cast_int64_t(&a->imm); 256 | break; 257 | } 258 | } 259 | break; 260 | } 261 | } 262 | 263 | void gen(vinstr_t *instructions, size_t n) 264 | { 265 | for(size_t i = 0; i < n; ++i) 266 | { 267 | vinstr_t* instr = &instructions[i]; 268 | /* printf("op=%s,operands=%d\n", vopcode_names[instr->opcode], instr->numoperands); */ 269 | handle_instruction(instr->opcode, instr); 270 | } 271 | } 272 | -------------------------------------------------------------------------------- /examples/break-statement.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main() 4 | { 5 | int i = 1; 6 | 7 | while(i < 10) 8 | { 9 | print("while\n"); 10 | if(i % 5 == 0) 11 | { 12 | print("breaking while\n"); 13 | break; 14 | } 15 | i += 1; 16 | } 17 | 18 | for(i = 1; i < 10; i += 1) 19 | { 20 | print("for\n"); 21 | if(i % 5 == 0) 22 | { 23 | print("breaking for\n"); 24 | break; 25 | } 26 | } 27 | return 0; 28 | } 29 | -------------------------------------------------------------------------------- /examples/hello-world.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main() 4 | { 5 | printf( "hello, world!\n" ); 6 | return 0; 7 | } 8 | -------------------------------------------------------------------------------- /examples/http.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | struct sockaddr_in 5 | { 6 | short sin_family; 7 | short sin_port; 8 | long s_addr; 9 | char sin_zero[8]; 10 | }; 11 | 12 | #define AF_INET 2 13 | #define AF_INET6 10 14 | 15 | #define SOCK_STREAM 1 16 | #define SOCK_DGRAM 2 17 | 18 | #define IPPROTO_TCP 6 19 | #define IPPROTO_UDP 17 20 | 21 | //TODO: fix or,and,shl,shr for word size 22 | int htons(int i) 23 | { 24 | return ((i << 8) & 0xff00) | ((i >> 8) & 0xff); 25 | } 26 | 27 | int socket(int domain, int type, int protocol) 28 | { 29 | return syscall(0x167, domain, type, protocol); 30 | } 31 | 32 | int accept(int sockfd, sockaddr_in *addr, int *addrlen) 33 | { 34 | int flags = 0; 35 | return syscall(0x16c, sockfd, addr, addrlen, flags); 36 | } 37 | 38 | int bind(int sockfd, sockaddr_in *addr, int addrlen) 39 | { 40 | return syscall(0x169, sockfd, addr, addrlen); 41 | } 42 | 43 | int listen(int sockfd, int backlog) 44 | { 45 | return syscall(0x16b, sockfd, backlog); 46 | } 47 | 48 | int close(int fd) 49 | { 50 | return syscall(0x06, fd); 51 | } 52 | 53 | #define SHUT_RD 0 54 | #define SHUT_WR 1 55 | #define SHUT_RDWR 2 56 | 57 | int shutdown(int fd, int how) 58 | { 59 | return syscall(0x175, fd, how); 60 | } 61 | 62 | int main() 63 | { 64 | int port = 8000; 65 | sockaddr_in sa; 66 | memset(&sa, 0, sizeof(sa)); 67 | sa.sin_family = AF_INET; 68 | sa.s_addr = 0; 69 | sa.sin_port = htons(port); 70 | 71 | int sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); 72 | if(sock == -1) 73 | return 0; 74 | printf("sock = %d\n", sock); 75 | int bnd = bind(sock, &sa, sizeof(sa)); 76 | if(bnd == -1) 77 | return 0; 78 | printf("bnd = %d\n", bnd); 79 | 80 | listen(sock, 5); 81 | 82 | while(1) 83 | { 84 | sockaddr_in cl; 85 | int len = sizeof(cl); 86 | int fd = accept(sock, &cl, &len); 87 | printf("got client fd %d, len = %d\n", fd, len); 88 | 89 | if(fd == -1) 90 | { 91 | printf("failed to accept client\n"); 92 | return 0; 93 | } 94 | const char *http_reply = "HTTP/1.1 200 OK\r\n\r\nHello"; 95 | write(fd, http_reply, strlen(http_reply)); 96 | shutdown(fd, SHUT_WR); 97 | close(fd); 98 | } 99 | //printf("sizeof = %d\n",sizeof(sa)); 100 | return 0; 101 | } 102 | -------------------------------------------------------------------------------- /examples/include/assert.h: -------------------------------------------------------------------------------- 1 | #ifndef ASSERT_H 2 | #define ASSERT_H 3 | 4 | #include 5 | 6 | //TODO: FIXME fix preprocessor add __FILE__, __LINE__ and stringify # 7 | void assert(int expr) 8 | { 9 | if (!expr) 10 | { 11 | printf("expression failed\n"); 12 | int3(); 13 | int3(); 14 | int3(); 15 | } 16 | } 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /examples/include/fcntl.h: -------------------------------------------------------------------------------- 1 | #ifndef FCNTL_H 2 | #define FCNTL_H 3 | 4 | #include 5 | 6 | #define O_RDONLY (0) 7 | #define O_WRONLY (1) 8 | #define O_RDWR (2) 9 | 10 | #define S_IRWXU (7 << 6) 11 | #define S_IRUSR (4 << 6) 12 | #define S_IWUSR (2 << 6) 13 | #define S_IXUSR (1 << 6) 14 | 15 | #define S_IRWXG (7 << 3) 16 | #define S_IRGRP (4 << 3) 17 | #define S_IXGRP (1 << 3) 18 | 19 | #define S_IROTH (4) 20 | #define S_IWOTH (2) 21 | #define S_IXOTH (1) 22 | 23 | #define S_ISUID (4 << 9) 24 | #define S_ISGID (2 << 9) 25 | #define S_ISVTX (1 << 9) 26 | 27 | int open(const char *filename, int flags, int mode) 28 | { 29 | return syscall(SYS_open, filename, flags, mode); 30 | } 31 | 32 | #endif 33 | -------------------------------------------------------------------------------- /examples/include/stdarg.h: -------------------------------------------------------------------------------- 1 | #ifndef STDARG_H 2 | #define STDARG_H 3 | 4 | #define va_list void** 5 | #define va_start(va, fmt) va = &fmt + sizeof(fmt) 6 | #define va_arg(va, type) *va, va += sizeof(type) 7 | #define va_end(va) va = 0 8 | 9 | #endif 10 | -------------------------------------------------------------------------------- /examples/include/stddef.h: -------------------------------------------------------------------------------- 1 | #ifndef STDDEF_H 2 | #define STDDEF_H 3 | #define NULL 0 4 | #endif 5 | -------------------------------------------------------------------------------- /examples/include/stdio.h: -------------------------------------------------------------------------------- 1 | #ifndef STDIO_H 2 | #define STDIO_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #define STDOUT_FILENO 1 10 | #define STDIN_FILENO 0 11 | #define STDERR_FILENO 2 12 | 13 | void print(const char *s) 14 | { 15 | write(STDOUT_FILENO, s, strlen(s) + 1); 16 | } 17 | 18 | int getchar() 19 | { 20 | char buf[1]; 21 | read(STDIN_FILENO, buf, 1); 22 | return buf[0]; 23 | } 24 | 25 | void putchar(int ch) 26 | { 27 | char buf[2]; 28 | buf[0] = ch; 29 | buf[1] = 0; 30 | print(buf); 31 | } 32 | 33 | void print_hex(int d) 34 | { 35 | for(int i = 0; i < 8; ++i) 36 | { 37 | int nibble = ( d >> ( ( 8 - i - 1 ) * 4 ) ) & 15; 38 | putchar( nibble >= 10 ? 'A' + nibble - 10 : '0' + nibble ); 39 | } 40 | } 41 | 42 | void print_decimal(int d) 43 | { 44 | int neg = d < 0; 45 | if(neg) 46 | d = -d; 47 | char buf[32]; 48 | int i = 0; 49 | buf[31] = 0; 50 | do 51 | { 52 | int m = d % 10; 53 | buf[sizeof(buf) - i - 2] = m + '0'; 54 | d /= 10; 55 | i += 1; 56 | } while(d > 0); 57 | if(neg) 58 | { 59 | buf[sizeof( buf ) - i - 2] = '-'; 60 | ++i; 61 | } 62 | print(&buf[sizeof(buf) - i - 1]); 63 | } 64 | 65 | void print_bits(int d, int little_endian) 66 | { 67 | char buf[33]; 68 | for(int i = 0; i < 32; ++i) 69 | { 70 | int b = d & ( 1 << i ); 71 | buf[little_endian ? 32-i-1 : i] = b ? '1' : '0'; 72 | } 73 | buf[32] = 0; 74 | print(buf); 75 | //write( STDOUT_FILENO, buf, sizeof(buf) ); 76 | } 77 | 78 | int printf(const char *fmt, ...) 79 | { 80 | va_list q; 81 | va_start(q, fmt); 82 | int l = strlen(fmt); 83 | for(int i = 0; i < l; ++i) 84 | { 85 | if(fmt[i] == '%') 86 | { 87 | int ch = fmt[i + 1]; 88 | if(ch == 'd') 89 | { 90 | int argd = va_arg( q, int ); 91 | print_decimal( argd ); 92 | } 93 | //TODO: add elseif and switch statement 94 | if(ch == 's') 95 | { 96 | const char *args = va_arg(q, int); //TODO: fix preprocessor handle const char* 97 | print(args); 98 | } 99 | 100 | if(ch == 'x') 101 | { 102 | int argx = va_arg( q, int ); 103 | print_hex(argx); 104 | } 105 | if ( ch == 'b' ) 106 | { 107 | int argb = va_arg( q, int ); 108 | print_bits( argb , 1); 109 | } 110 | if ( ch == 'B' ) 111 | { 112 | int argb = va_arg( q, int ); 113 | print_bits( argb , 0); 114 | } 115 | ++i; 116 | } 117 | else 118 | putchar( fmt[i] ); 119 | } 120 | va_end(q); 121 | return 0; 122 | } 123 | #endif 124 | -------------------------------------------------------------------------------- /examples/include/stdlib.h: -------------------------------------------------------------------------------- 1 | #ifndef STDLIB_H 2 | #define STDLIB_H 3 | 4 | #include 5 | #include 6 | 7 | void exit(int code) 8 | { 9 | syscall(SYS_exit, code); 10 | } 11 | 12 | int atoi(const char *_str) 13 | { 14 | int total = 0; 15 | int len = 0; 16 | const char *str = _str; 17 | int neg = *str == '-'; 18 | if(neg) 19 | ++str; 20 | const char *p = str; 21 | while(*p++) ++len; 22 | int exp = 1; 23 | for(int i = 0; i < len; ++i) 24 | { 25 | int t = str[len - i - 1] - '0'; 26 | total += t * exp; 27 | exp *= 10; 28 | } 29 | if(neg) 30 | return -total; 31 | return total; 32 | } 33 | 34 | void *malloc(int size) 35 | { 36 | int current = syscall(SYS_brk, 0); 37 | int next = syscall(SYS_brk, current + size); 38 | //printf("current=%x,next=%x\n",current,next); 39 | return current == next ? NULL : current; 40 | } 41 | 42 | void free(void *p) 43 | { 44 | //does nothing atm 45 | } 46 | #endif 47 | -------------------------------------------------------------------------------- /examples/include/string.h: -------------------------------------------------------------------------------- 1 | #ifndef STRING_H 2 | #define STRING_H 3 | 4 | int strlen(const char *s) 5 | { 6 | int len = 0; 7 | while(*s++) len++; 8 | return len; 9 | } 10 | 11 | int strcmp(const char *a, const char *b) 12 | { 13 | int al = strlen(a); 14 | int bl = strlen(b); 15 | if(al != bl) 16 | return 1; 17 | for(int i = 0; i < al; ++i) 18 | { 19 | if(a[i] != b[i]) 20 | return 1; 21 | } 22 | return 0; 23 | } 24 | 25 | void memset(char *p, int value, int n) 26 | { 27 | for(int i = 0; i < n; ++i) 28 | { 29 | p[i] = value & 0xff; 30 | } 31 | } 32 | 33 | #endif 34 | -------------------------------------------------------------------------------- /examples/include/sys/syscall.h: -------------------------------------------------------------------------------- 1 | #ifndef SYSCALL_H 2 | #define SYSCALL_H 3 | 4 | //TODO: switch between architectures with ifdef or such and #error on failure 5 | //TODO: FIXME there's no guarantee that the function signature matches between architectures? 6 | 7 | // x64 8 | 9 | #define SYS_exit 60 10 | #define SYS_fork 57 11 | #define SYS_read 0 12 | #define SYS_write 1 13 | #define SYS_open 2 14 | #define SYS_close 3 15 | //#define SYS_waitpid doesn't exist anymore 16 | #define SYS_creat 85 17 | #define SYS_link 86 18 | #define SYS_unlink 87 19 | #define SYS_execve 59 20 | #define SYS_chdir 80 21 | #define SYS_time 201 22 | #define SYS_mknod 133 23 | #define SYS_chmod 90 24 | #define SYS_lchown 94 25 | 26 | #define SYS_brk 12 27 | #define SYS_nanosleep 35 28 | 29 | // x86 30 | 31 | //#define SYS_exit 1 32 | //#define SYS_fork 2 33 | //#define SYS_read 3 34 | //#define SYS_write 4 35 | //#define SYS_open 5 36 | //#define SYS_close 6 37 | //#define SYS_waitpid 7 38 | //#define SYS_creat 8 39 | //#define SYS_link 9 40 | //#define SYS_unlink 10 41 | //#define SYS_execve 11 42 | //#define SYS_chdir 12 43 | //#define SYS_time 13 44 | //#define SYS_mknod 14 45 | //#define SYS_chmod 15 46 | //#define SYS_lchown 16 47 | // 48 | //#define SYS_brk 0x2d 49 | //#define SYS_nanosleep 0xa2 50 | 51 | //TODO: add more syscalls nr 52 | #endif 53 | -------------------------------------------------------------------------------- /examples/include/time.h: -------------------------------------------------------------------------------- 1 | #ifndef TIME_H 2 | #define TIME_H 3 | 4 | #include 5 | 6 | int time(int timer) 7 | { 8 | return syscall( SYS_time, timer ); 9 | } 10 | 11 | #endif 12 | -------------------------------------------------------------------------------- /examples/include/unistd.h: -------------------------------------------------------------------------------- 1 | #ifndef UNISTD_H 2 | #define UNISTD_H 3 | 4 | int read(int fd, char *buf, int len) 5 | { 6 | return syscall(SYS_read, fd, buf, len); 7 | } 8 | 9 | void write(int fd, const char *buf, int len) 10 | { 11 | syscall(SYS_write, fd, buf, len); 12 | } 13 | 14 | int sleep(int sec) 15 | { 16 | int d[2]; 17 | d[0] = sec; 18 | d[1] = 0; 19 | 20 | int ret = syscall(SYS_nanosleep, d, 0); 21 | //TODO: handle EINTR 22 | if(ret < 0) 23 | { 24 | int errno = -ret; 25 | //printf( "sleep failed, ret = %x %d\n", errno, errno); 26 | return sec; 27 | } 28 | return 0; 29 | } 30 | 31 | #endif 32 | -------------------------------------------------------------------------------- /examples/infinite-loop-print-time-sleep.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | int main() 6 | { 7 | while(1) 8 | { 9 | printf("time = %d\n", time(0)); 10 | sleep( 1 ); 11 | } 12 | return 0; 13 | } 14 | -------------------------------------------------------------------------------- /examples/memory-ffi.c: -------------------------------------------------------------------------------- 1 | //ocean.exe -d -bmemory -lmsvcrt.dll -lkernel32.dll -luser32.dll test.c test.exe 2 | //last argument test.exe is unused atm, because we're running the code directly 3 | 4 | int time(int t); 5 | int printf(const char *format, ...); 6 | 7 | int MessageBoxA( 8 | void* hWnd, 9 | const char* lpText, 10 | const char* lpCaption, 11 | int uType 12 | ); 13 | 14 | int main() 15 | { 16 | MessageBoxA(0, "hello world", 0, 0); 17 | printf("hello world\n"); 18 | return time(0); 19 | } -------------------------------------------------------------------------------- /examples/printf.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main() 4 | { 5 | printf("test %x %s\n", 123, "string"); 6 | return 0; 7 | } 8 | -------------------------------------------------------------------------------- /examples/ptr.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | 5 | void test(int *p) 6 | { 7 | *(char*)p = 1; 8 | } 9 | 10 | int main() 11 | { 12 | int a = 0xff << 8; 13 | printf("a = %d\n", a); 14 | test(&a); 15 | printf("a = %d\n", a); 16 | return 0; 17 | } 18 | -------------------------------------------------------------------------------- /examples/read-file-into-buffer.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | void test() 7 | { 8 | char buf[32]; 9 | int fd = open("pre.c", 0, 0); 10 | printf("fd = %x\n", fd); 11 | int n = 0; 12 | do 13 | { 14 | n = read(fd, buf, sizeof(buf)); 15 | // printf("n = %d\n", n); 16 | // printf("buf = %s\n", buf); 17 | for (int i = 0; i < sizeof(buf); ++i) 18 | putchar(buf[i]); 19 | } while (n == sizeof(buf)); 20 | } 21 | 22 | int main() 23 | { 24 | test(); 25 | return 0; 26 | } 27 | -------------------------------------------------------------------------------- /examples/struct.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | struct entity 4 | { 5 | char name[64]; 6 | int health; 7 | int maxhealth; 8 | }; 9 | 10 | void test(entity* p) 11 | { 12 | p->maxhealth = 100; 13 | p->health = p->maxhealth; 14 | } 15 | 16 | int main() 17 | { 18 | entity e; 19 | test(&e); 20 | 21 | printf("health = %d\n", e.health); 22 | return 0; 23 | } 24 | -------------------------------------------------------------------------------- /examples/syscall.c: -------------------------------------------------------------------------------- 1 | //TODO: write a preprocessor that includes stdio/string/stdlib so no need to paste utility functions in every file 2 | 3 | int strlen(const char *s) 4 | { 5 | int i = 0; 6 | while(s[i]) 7 | { 8 | i+=1; 9 | } 10 | return i; 11 | } 12 | 13 | //using syscall nr from x86 (32-bit) 14 | void write(int fd, const char *buf, int len) 15 | { 16 | syscall(4, fd, buf, len); 17 | } 18 | 19 | void print(const char *s) 20 | { 21 | write(1, s, strlen(s) + 1); 22 | } 23 | 24 | int open(const char *filename, int flags, unsigned short mode) 25 | { 26 | return syscall(5, filename, flags, mode); 27 | } 28 | 29 | void exit(int code) 30 | { 31 | syscall(1, code); 32 | } 33 | 34 | int main() 35 | { 36 | print("hello, world\n"); 37 | 38 | //returns byte of time(0) 39 | exit(syscall(13)); 40 | } 41 | -------------------------------------------------------------------------------- /examples/user-input.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main() 5 | { 6 | int ch; 7 | while ( (ch = getchar()) != 'q') 8 | { 9 | putchar(ch); 10 | } 11 | return 0; 12 | } 13 | -------------------------------------------------------------------------------- /imm.h: -------------------------------------------------------------------------------- 1 | #ifndef IMM_H 2 | #define IMM_H 3 | #include "types.h" 4 | #include 5 | 6 | typedef struct 7 | { 8 | int nbits; 9 | union 10 | { 11 | int64_t dq; 12 | int32_t dd; 13 | int16_t dw; 14 | int8_t db; 15 | }; 16 | bool is_unsigned; 17 | } imm_t; 18 | 19 | #define DECLARE_IMM_CAST(TYPE) \ 20 | static TYPE imm_cast_##TYPE(imm_t* imm) \ 21 | { \ 22 | switch (imm->nbits) \ 23 | { \ 24 | case 8: \ 25 | return (TYPE)imm->db; \ 26 | case 16: \ 27 | return (TYPE)imm->dw; \ 28 | case 32: \ 29 | return (TYPE)imm->dd; \ 30 | case 64: \ 31 | return (TYPE)imm->dq; \ 32 | } \ 33 | return 0; \ 34 | } 35 | 36 | DECLARE_IMM_CAST(int8_t) 37 | DECLARE_IMM_CAST(int16_t) 38 | DECLARE_IMM_CAST(int32_t) 39 | DECLARE_IMM_CAST(int64_t) 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /instruction.h: -------------------------------------------------------------------------------- 1 | #ifndef INSTRUCTION_H 2 | #define INSTRUCTION_H 3 | 4 | #include "imm.h" 5 | #include "operand.h" 6 | #include "virtual_opcodes.h" 7 | 8 | typedef struct 9 | { 10 | /* size_t index; */ 11 | vopcode_t opcode; 12 | voperand_t operands[4]; 13 | size_t numoperands; 14 | } vinstr_t; 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /lex.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "parse.h" 4 | #include "token.h" 5 | #include "rhd/heap_string.h" 6 | #include "rhd/linked_list.h" 7 | #include "types.h" 8 | 9 | struct lexer 10 | { 11 | const char *buf; 12 | int bufsz; 13 | int pos; 14 | struct token tk; 15 | int lineno; 16 | struct linked_list *tokens; 17 | int savepos; 18 | int flags; 19 | }; 20 | 21 | static int next(struct lexer *lex) 22 | { 23 | if(lex->pos + 1 > lex->bufsz) 24 | return -1; 25 | return lex->buf[lex->pos++]; 26 | } 27 | 28 | static void save(struct lexer *lex) 29 | { 30 | lex->savepos = lex->pos; 31 | } 32 | 33 | static void restore(struct lexer *lex) 34 | { 35 | lex->pos = lex->savepos; 36 | } 37 | 38 | static int next_check(struct lexer *lex, int check) 39 | { 40 | int pos = lex->pos; 41 | int n = next(lex); 42 | if(n != check) 43 | { 44 | lex->pos = pos; 45 | return 1; 46 | } 47 | return 0; 48 | } 49 | 50 | static heap_string next_number(struct lexer *lex, int *is_int) 51 | { 52 | *is_int = 1; 53 | //undo the fetch from before 54 | --lex->pos; 55 | 56 | heap_string s = NULL; 57 | while(1) 58 | { 59 | int ch = next(lex); 60 | if(ch == -1) 61 | { 62 | heap_string_free(&s); 63 | return NULL; 64 | } 65 | int valid = ( ch >= '0' && ch <= '9' ) || ch == '.' || ch == 'f'; 66 | if(!valid) 67 | { 68 | --lex->pos; 69 | break; 70 | } 71 | if(ch == 'f') 72 | { 73 | *is_int = 0; 74 | break; 75 | } 76 | if(ch == '.') 77 | { 78 | if(!*is_int) //can't have more than one . 79 | { 80 | heap_string_free(&s); 81 | return NULL; 82 | } else 83 | *is_int = 0; 84 | } 85 | heap_string_push( &s, ch ); 86 | } 87 | return s; 88 | } 89 | 90 | static heap_string next_match_string(struct lexer *lex) 91 | { 92 | //undo the fetch from before 93 | --lex->pos; 94 | 95 | heap_string s = NULL; 96 | int bs = 0; 97 | while(1) 98 | { 99 | int ch = next(lex); 100 | if(ch == -1 || ch == '"') 101 | { 102 | --lex->pos; 103 | return s; 104 | } 105 | if(bs) 106 | { 107 | switch(ch) 108 | { 109 | case 'n': 110 | ch = '\n'; 111 | break; 112 | case 'r': 113 | ch = '\r'; 114 | break; 115 | case 't': 116 | ch = '\t'; 117 | break; 118 | case '\\': 119 | ch = '\\'; 120 | break; 121 | } 122 | bs = 0; 123 | } 124 | 125 | if(ch == '\\') 126 | bs = 1; 127 | else 128 | heap_string_push(&s, ch); 129 | } 130 | return s; 131 | } 132 | 133 | static heap_string next_match(struct lexer *lex, int (*cmp)(int)) 134 | { 135 | //undo the fetch from before 136 | --lex->pos; 137 | 138 | heap_string s = NULL; 139 | while(1) 140 | { 141 | int ch = next(lex); 142 | if(ch == -1 || !cmp(ch)) 143 | { 144 | --lex->pos; 145 | return s; 146 | } 147 | heap_string_push(&s, ch); 148 | } 149 | return s; 150 | } 151 | 152 | static int match_test_ident(int ch) 153 | { 154 | //Keep in mind this only works with numbers being non-first because there's a if before that checks for integers and this is called 155 | //in a if else, otherwise check if it's only on the first character. 156 | return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '$' || ch == '_' || (ch >= '0' && ch <= '9'); 157 | } 158 | 159 | static int match_test_string(int ch) 160 | { 161 | return ch != '"'; 162 | } 163 | 164 | static int byte_value(int ch) 165 | { 166 | if(ch >= '0' && ch <= '9') 167 | return ch - '0'; 168 | if(ch >= 'a' && ch <= 'z') 169 | return ch - 'a' + 10; 170 | if(ch >= 'A' && ch <= 'Z') 171 | return ch - 'A' + 10; 172 | return -1; 173 | } 174 | 175 | static int token(struct lexer *lex, struct token *tk) 176 | { 177 | int single_line_comment = 0; 178 | int multiple_line_comment = 0; 179 | int ch; 180 | retry: 181 | ch = next(lex); 182 | tk->lineno = lex->lineno + 1; 183 | if(ch == -1) 184 | return 1; 185 | if(ch == 0) 186 | { 187 | tk->type = TK_EOF; 188 | return 0; 189 | } 190 | if(multiple_line_comment && ch == '*' && !next_check(lex, '/')) 191 | { 192 | multiple_line_comment = 0; 193 | goto retry; 194 | } 195 | else if(ch == '\n') 196 | single_line_comment = 0; 197 | if(single_line_comment || multiple_line_comment) 198 | goto retry; 199 | 200 | tk->type = ch; 201 | switch(ch) 202 | { 203 | case '\n': 204 | ++lex->lineno; 205 | if ( lex->flags & LEX_FL_NEWLINE_TOKEN ) 206 | { 207 | tk->type = '\n'; 208 | return 0; 209 | } 210 | case '\t': 211 | goto retry; 212 | 213 | case '\r': 214 | case ' ': 215 | ++tk->character_start; 216 | goto retry; 217 | 218 | case '<': 219 | if(!next_check(lex, '<')) 220 | { 221 | tk->type = TK_LSHIFT; 222 | return 0; 223 | } else if(!next_check(lex, '=')) 224 | { 225 | tk->type = TK_LEQUAL; 226 | return 0; 227 | } 228 | break; 229 | case '>': 230 | if(!next_check(lex, '>')) 231 | { 232 | tk->type = TK_RSHIFT; 233 | return 0; 234 | } else if(!next_check(lex, '=')) 235 | { 236 | tk->type = TK_GEQUAL; 237 | return 0; 238 | } 239 | break; 240 | case '"': 241 | { 242 | tk->type = TK_STRING; 243 | tk->string[0] = 0; 244 | if(!next_check(lex, '"')) 245 | { 246 | return 0; 247 | } 248 | ++lex->pos; 249 | heap_string s = next_match_string(lex); 250 | snprintf(tk->string, sizeof(tk->string), "%s", s); 251 | heap_string_free(&s); 252 | if(next_check(lex, '"')) 253 | { 254 | //expected closing " 255 | return 1; 256 | } 257 | } break; 258 | case '/': 259 | if(!next_check(lex, '/')) 260 | { 261 | single_line_comment = 1; 262 | goto retry; 263 | } else if(!next_check(lex, '*')) 264 | { 265 | multiple_line_comment = 1; 266 | goto retry; 267 | } else if(!next_check(lex, '=')) 268 | { 269 | tk->type = TK_DIVIDE_ASSIGN; 270 | return 0; 271 | } 272 | break; 273 | case '*': 274 | if(!next_check(lex, '=')) 275 | { 276 | tk->type = TK_MULTIPLY_ASSIGN; 277 | return 0; 278 | } 279 | break; 280 | case '^': 281 | if(!next_check(lex, '=')) 282 | { 283 | tk->type = TK_XOR_ASSIGN; 284 | return 0; 285 | } 286 | break; 287 | case '-': 288 | if(!next_check(lex, '>')) 289 | { 290 | tk->type = TK_ARROW; 291 | return 0; 292 | } else if(!next_check(lex, '=')) 293 | { 294 | tk->type = TK_MINUS_ASSIGN; 295 | return 0; 296 | } else if(!next_check(lex, '-')) 297 | { 298 | tk->type = TK_MINUS_MINUS; 299 | return 0; 300 | } 301 | break; 302 | case '+': 303 | if(!next_check(lex, '=')) 304 | { 305 | tk->type = TK_PLUS_ASSIGN; 306 | return 0; 307 | } else if(!next_check(lex, '+')) 308 | { 309 | tk->type = TK_PLUS_PLUS; 310 | return 0; 311 | } 312 | break; 313 | case '=': 314 | if(!next_check(lex, '=')) 315 | { 316 | tk->type = TK_EQUAL; 317 | return 0; 318 | } 319 | break; 320 | case '|': 321 | if(!next_check(lex, '=')) 322 | { 323 | tk->type = TK_OR_ASSIGN; 324 | return 0; 325 | } 326 | break; 327 | case '%': 328 | if(!next_check(lex, '=')) 329 | { 330 | tk->type = TK_MOD_ASSIGN; 331 | return 0; 332 | } 333 | break; 334 | 335 | case '\'': 336 | { 337 | tk->type = TK_INTEGER; 338 | if(!next_check(lex, '"')) 339 | { 340 | perror("error: empty character constant\n"); 341 | return 1; 342 | } 343 | int character_constant = next(lex); 344 | if(character_constant == -1 || character_constant == 0) 345 | { 346 | printf("unexpected end of file\n"); 347 | return 1; 348 | } 349 | assert(character_constant > 0 && character_constant <= 0xff); 350 | tk->integer.value = character_constant; //TODO: add support for \0 \hex and other stuff 351 | if(next_check(lex, '\'')) 352 | { 353 | printf("expecting closing ' for character constant\n"); 354 | //expected closing " 355 | return 1; 356 | } 357 | } break; 358 | 359 | case '!': 360 | if(!next_check(lex, '=')) 361 | { 362 | tk->type = TK_NOT_EQUAL; 363 | return 0; 364 | } 365 | break; 366 | 367 | case '.': 368 | save(lex); 369 | if(next(lex) == '.' && next(lex) == '.') 370 | { 371 | tk->type = TK_DOT_THREE_TIMES; 372 | return 0; 373 | } else 374 | restore(lex); 375 | break; 376 | 377 | case '\\': 378 | if(lex->flags & LEX_FL_BACKSLASH_TOKEN) 379 | { 380 | tk->type = '\\'; 381 | return 0; 382 | } 383 | case '#': 384 | case '{': 385 | case '}': 386 | case '[': 387 | case ']': 388 | case '&': 389 | case '(': 390 | case '?': 391 | case ')': 392 | case ';': 393 | case ':': 394 | case ',': 395 | case '~': 396 | return 0; 397 | 398 | default: 399 | if(ch == '0' && !next_check(lex, 'x')) 400 | { 401 | tk->type = TK_INTEGER; 402 | tk->integer.is_unsigned = false; 403 | tk->integer.suffix = INTEGER_SUFFIX_NONE; 404 | tk->integer.value = 0; 405 | while(1) 406 | { 407 | int nch = next(lex); 408 | if(nch == -1) 409 | return 1; 410 | int bv = byte_value( nch ); 411 | if ( bv == -1 ) 412 | { 413 | --lex->pos; 414 | break; 415 | } 416 | tk->integer.value = (tk->integer.value << 4) | (bv & 0xf); 417 | } 418 | } else if(ch >= '0' && ch <= '9') 419 | { 420 | int is_int; 421 | heap_string s = next_number( lex, &is_int ); 422 | if(!s) //error 423 | return 1; 424 | if(is_int) 425 | { 426 | tk->type = TK_INTEGER; 427 | tk->integer.value = atoll( s ); 428 | } else 429 | { 430 | tk->scalar.suffix = SCALAR_SUFFIX_NONE; 431 | tk->type = TK_FLOAT; 432 | tk->scalar.value = atof( s ); 433 | } 434 | heap_string_free( &s ); 435 | } else if(match_test_ident(ch)) 436 | { 437 | tk->type = TK_IDENT; 438 | heap_string s = next_match(lex, match_test_ident); 439 | if((lex->flags & LEX_FL_FORCE_IDENT) != LEX_FL_FORCE_IDENT) 440 | { 441 | // check whether this ident is a special ident 442 | if ( !strcmp( s, "for" ) ) 443 | tk->type = TK_FOR; 444 | else if ( !strcmp( s, "while" ) ) 445 | tk->type = TK_WHILE; 446 | else if ( !strcmp( s, "do" ) ) 447 | tk->type = TK_DO; 448 | else if ( !strcmp( s, "if" ) ) 449 | tk->type = TK_IF; 450 | else if ( !strcmp( s, "else" ) ) 451 | tk->type = TK_ELSE; 452 | else if ( !strcmp( s, "return" ) ) 453 | tk->type = TK_RETURN; 454 | else if ( !strcmp( s, "break" ) ) 455 | tk->type = TK_BREAK; 456 | else if ( !strcmp( s, "char" ) ) 457 | tk->type = TK_T_CHAR; 458 | else if ( !strcmp( s, "short" ) ) 459 | tk->type = TK_T_SHORT; 460 | else if ( !strcmp( s, "int" ) ) 461 | tk->type = TK_T_INT; 462 | else if ( !strcmp( s, "long" ) ) 463 | tk->type = TK_T_LONG; 464 | else if ( !strcmp( s, "float" ) ) 465 | tk->type = TK_T_FLOAT; 466 | else if ( !strcmp( s, "double" ) ) 467 | tk->type = TK_T_DOUBLE; 468 | else if ( !strcmp( s, "void" ) ) 469 | tk->type = TK_T_VOID; 470 | else if ( !strcmp( s, "const" ) ) 471 | tk->type = TK_CONST; 472 | else if ( !strcmp( s, "unsigned" ) ) 473 | tk->type = TK_T_UNSIGNED; 474 | else if ( !strcmp( s, "sizeof" ) ) 475 | tk->type = TK_SIZEOF; 476 | else if ( !strcmp( s, "__emit" ) ) 477 | tk->type = TK_EMIT; 478 | else if ( !strcmp( s, "struct" ) ) 479 | tk->type = TK_STRUCT; 480 | else if ( !strcmp( s, "union" ) ) 481 | tk->type = TK_UNION; 482 | else if ( !strcmp( s, "typedef" ) ) 483 | tk->type = TK_TYPEDEF; 484 | else if ( !strcmp( s, "enum" ) ) 485 | tk->type = TK_ENUM; 486 | } 487 | snprintf(tk->string, sizeof(tk->string), "%s", s); 488 | heap_string_free(&s); 489 | } else 490 | { 491 | tk->type = TK_INVALID; 492 | printf("got %c, unhandled error\n", ch); 493 | return 1; //error 494 | } 495 | break; 496 | } 497 | return 0; 498 | } 499 | 500 | void parse(const char *data, struct token **tokens_out/*must be free'd*/, int *num_tokens, int flags) 501 | { 502 | *tokens_out = NULL; 503 | *num_tokens = 0; 504 | 505 | int len = strlen(data); 506 | 507 | struct lexer lex = { 508 | .buf = data, 509 | .bufsz = strlen(data) + 1, 510 | .pos = 0, 511 | .lineno = 0, 512 | .tokens = NULL, 513 | .flags = flags 514 | }; 515 | 516 | lex.tokens = linked_list_create( struct token ); 517 | 518 | struct token tk = { 0 }; 519 | struct token *end = NULL; 520 | 521 | for ( int i = 0; i < len; ++i ) 522 | { 523 | tk.start = lex.pos; 524 | tk.character_start = lex.pos; 525 | int ret = token( &lex, &tk ); 526 | if ( ret ) 527 | { 528 | break; 529 | } 530 | tk.end = lex.pos; 531 | // if(tk.type == TK_IDENT) 532 | //printf("token = %s (%s)\n", token_type_to_string(tk.type), tk.string); 533 | end = linked_list_prepend( lex.tokens, tk ); 534 | (*num_tokens)++; 535 | } 536 | 537 | end->end = lex.pos; 538 | 539 | //allocate num_tokens 540 | struct token *tokens = malloc(sizeof(struct token) * *num_tokens); 541 | assert(tokens != NULL); 542 | 543 | int index = 0; 544 | linked_list_reversed_foreach(lex.tokens, struct token*, it, 545 | { 546 | if(it->type == TK_IDENT) 547 | ;//printf("]%s\n", it->string); 548 | 549 | memcpy(&tokens[index++], it, sizeof(struct token)); 550 | }); 551 | 552 | *tokens_out = tokens; 553 | 554 | linked_list_destroy(&lex.tokens); 555 | } 556 | -------------------------------------------------------------------------------- /main-ast.c: -------------------------------------------------------------------------------- 1 | #define HEAP_STRING_IMPL 2 | #include "rhd/heap_string.h" 3 | 4 | #define LINKED_LIST_IMPL 5 | #include "rhd/linked_list.h" 6 | 7 | #define HASH_MAP_IMPL 8 | #include "rhd/hash_map.h" 9 | 10 | #include "rhd/hash_string.h" 11 | 12 | #include "arena.h" 13 | #include "std.h" 14 | #include "token.h" 15 | #include "ast.h" 16 | #include "types.h" 17 | #include "parse.h" 18 | #include "compile.h" 19 | 20 | static void print_hex(u8 *buf, size_t n) 21 | { 22 | for (int i = 0; i < n; ++i) 23 | { 24 | printf("%02X%s", buf[i] & 0xff, i + 1 == n ? "" : " "); 25 | } 26 | } 27 | 28 | int generate_ast(struct token* tokens, int num_tokens, struct linked_list** ll /*for freeing the whole tree*/, 29 | struct ast_node** root, bool); 30 | int main(int argc, char **argv) 31 | { 32 | assert(argc > 1); 33 | 34 | //Step 1. Preprocess file first. 35 | /* pre.c */ 36 | heap_string preprocess_file( const char* filename, const char** includepaths, int verbose, struct hash_map *defines, struct hash_map **defines_out); 37 | const char* includepaths[] = { "examples/include/", NULL }; 38 | heap_string data = preprocess_file( argv[1], includepaths, 0, NULL, NULL ); 39 | 40 | if ( !data ) 41 | { 42 | printf( "failed to read file '%s'\n", argv[1] ); 43 | return 1; 44 | } 45 | 46 | //Step 2. Tokenize the preprocessed result 47 | struct token* tokens = NULL; 48 | int num_tokens = 0; 49 | 50 | // printf("data = %s\n", data); 51 | parse( data , &tokens, &num_tokens, LEX_FL_NONE); 52 | 53 | 54 | //Optionally print out the tokens. 55 | /* char str[256]={0}; */ 56 | /* for(int i = 0; i < num_tokens; ++i) */ 57 | /* { */ 58 | /* struct token *tk = &tokens[i]; */ 59 | /* token_stringify(data, heap_string_size(&data), tk, str, sizeof(str)); */ 60 | /* printf("%s", str); */ 61 | /* } */ 62 | 63 | arena_t* arena; 64 | arena_create(&arena, "ast", 1000 * 1000 * 128); // 128MB 65 | 66 | ast_context_t ast_context; 67 | ast_init_context(&ast_context, arena); 68 | 69 | if(ast_process_tokens(&ast_context, tokens, num_tokens)) 70 | { 71 | /* print_ast(ast_context.program_node, 0); */ 72 | /* printf("done processing tokens\n"); */ 73 | } 74 | // gcc -w -g main-ast.c lex.c ast.c pre.c parse.c && gdb -ex run --args ./a.out examples/syscall.c 75 | 76 | /* struct linked_list *ast_list = NULL; */ 77 | /* struct ast_node *root = NULL; */ 78 | 79 | /* //Step 3. Generate AST from tokens. */ 80 | /* int ast = generate_ast(tokens, num_tokens, &ast_list, &root, 1); */ 81 | /* if(ast) */ 82 | /* { */ 83 | /* printf("Failed to generate AST\n"); */ 84 | /* return 0; */ 85 | /* } */ 86 | /* root = NULL; */ 87 | /* linked_list_destroy(&ast_list); */ 88 | 89 | compiler_t compile_ctx; 90 | compiler_init(&compile_ctx, arena, 64, COMPILER_FLAGS_NONE); 91 | int compile(compiler_t * ctx, ast_node_t * head); 92 | compile(&compile_ctx, ast_context.program_node); 93 | 94 | function_t* lookup_function_by_name(compiler_t* ctx, const char* name); 95 | function_t *fn = lookup_function_by_name(&compile_ctx, "main"); 96 | assert(fn); 97 | heap_string s = NULL; 98 | bool x86(function_t * f, heap_string*); 99 | x86(fn, &s); 100 | print_hex(s, heap_string_size(&s)); 101 | 102 | heap_string_free(&s); 103 | free(tokens); 104 | heap_string_free(&data); 105 | arena_destroy(&arena); 106 | return 0; 107 | } 108 | -------------------------------------------------------------------------------- /main.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "std.h" 4 | #include "token.h" 5 | #include "ast.h" 6 | #include "types.h" 7 | #include "parse.h" 8 | 9 | #define HEAP_STRING_IMPL 10 | #include "rhd/heap_string.h" 11 | 12 | #define LINKED_LIST_IMPL 13 | #include "rhd/linked_list.h" 14 | 15 | #define HASH_MAP_IMPL 16 | #include "rhd/hash_map.h" 17 | 18 | #include "compile.h" 19 | #include "rhd/hash_string.h" 20 | 21 | #ifdef _WIN32 22 | #define WIN32_LEAN_AND_MEAN 23 | #include 24 | #pragma comment(lib, "kernel32.lib") 25 | #endif 26 | 27 | // imported functions from other files 28 | 29 | int compile_ast(struct ast_node *head, compiler_t *ctx, codegen_t*); 30 | void codegen_x64(codegen_t *cg); 31 | 32 | int opt_flags = 0; 33 | 34 | int read_symbols_for_dynamic_library(const char *lib_name, struct linked_list **symbols) 35 | { 36 | struct linked_list* sym_list = *symbols; 37 | #ifdef _WIN32 38 | HMODULE lib = LoadLibraryA(lib_name); //if we just want to read the symbols without loading it into memory, then use the line below. 39 | //HMODULE lib = LoadLibraryExA(lib_name, NULL, DONT_RESOLVE_DLL_REFERENCES); 40 | if (!lib) 41 | return 1; 42 | //assert(((PIMAGE_DOS_HEADER)lib)->e_magic == IMAGE_DOS_SIGNATURE); 43 | PIMAGE_NT_HEADERS header = (PIMAGE_NT_HEADERS)((BYTE*)lib + ((PIMAGE_DOS_HEADER)lib)->e_lfanew); 44 | //assert(header->Signature == IMAGE_NT_SIGNATURE); 45 | //assert(header->OptionalHeader.NumberOfRvaAndSizes > 0); 46 | PIMAGE_EXPORT_DIRECTORY exports = (PIMAGE_EXPORT_DIRECTORY)((BYTE*)lib + header-> 47 | OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_EXPORT].VirtualAddress); 48 | //assert(exports->AddressOfNames != 0); 49 | BYTE** names = (BYTE**)((int)lib + exports->AddressOfNames); 50 | for (int i = 0; i < exports->NumberOfNames; i++) 51 | { 52 | const char* export_name = (BYTE*)lib + (int)names[i]; 53 | intptr_t loc = (intptr_t)GetProcAddress(lib, export_name); 54 | assert(loc); 55 | struct dynlib_sym sym = { 56 | .lib_name = lib_name, 57 | .sym_name = export_name, 58 | .offset = loc, 59 | .hash = hash_string(export_name) 60 | }; 61 | linked_list_prepend(sym_list, sym); 62 | //printf("\tadded '%s' to import list\n", export_name); 63 | } 64 | *symbols = sym_list; 65 | #endif 66 | return 0; 67 | } 68 | 69 | //TODO: FIXME implement linux / libdl.so and proper windows exe IAT table 70 | struct dynlib_sym* find_lib_symbol(void *userptr, const char* key) 71 | { 72 | struct linked_list* symbols = (struct linked_list*)userptr; 73 | hash_t hash = hash_string(key); 74 | linked_list_reversed_foreach(symbols, struct dynlib_sym*, it, 75 | { 76 | if (it->hash == hash && !strcmp(it->sym_name, key)) 77 | return it; 78 | }); 79 | return NULL; 80 | } 81 | 82 | int main( int argc, char** argv ) 83 | { 84 | assert(argc > 0); 85 | assert(argc < 32); 86 | const char *files[32]; 87 | int numfiles = 0; 88 | //use build target memory as default 89 | int build_target = BT_LINUX_X64; 90 | struct linked_list* symbols = linked_list_create(struct dynlib_sym); 91 | size_t nsymbols = 0; 92 | 93 | #ifdef _WIN32 94 | //link some commonly used libraries by default 95 | //TODO: FIXME relocate this 96 | read_symbols_for_dynamic_library("msvcrt.dll", &symbols); 97 | read_symbols_for_dynamic_library("kernel32.dll", &symbols); 98 | read_symbols_for_dynamic_library("user32.dll", &symbols); 99 | read_symbols_for_dynamic_library("opengl32.dll", &symbols); 100 | #endif 101 | 102 | for ( int i = 1; i < argc; ++i ) 103 | { 104 | if ( argv[i][0] == '-' ) 105 | { 106 | switch ( argv[i][1] ) 107 | { 108 | case 'a': 109 | opt_flags |= OPT_AST; 110 | break; 111 | case 'i': 112 | //only print the asm instructions, don't write to file 113 | opt_flags |= OPT_INSTR; 114 | break; 115 | case 'd': 116 | //mainly used atm for inserting int3 breakpoints 117 | opt_flags |= OPT_DEBUG; 118 | break; 119 | case 'v': 120 | opt_flags |= OPT_VERBOSE; 121 | break; 122 | case 'b': 123 | { 124 | const char* build_target_str = (const char*)&argv[i][2]; 125 | if(opt_flags & OPT_VERBOSE) 126 | printf( "using build target: %s\n", build_target_str); 127 | if (!strcmp(build_target_str, "windows")) 128 | build_target = BT_WIN32; 129 | else if (!strcmp(build_target_str, "linux")) 130 | build_target = BT_LINUX_X64; 131 | else if (!strcmp(build_target_str, "memory")) 132 | build_target = BT_MEMORY; 133 | else if(!strcmp(build_target_str, "opcodes")) 134 | build_target = BT_OPCODES; 135 | if (opt_flags & OPT_VERBOSE) 136 | printf("build_target = %d\n", build_target); 137 | } 138 | break; 139 | case 'l': 140 | { 141 | const char* lib_name = (const char*)&argv[i][2]; 142 | read_symbols_for_dynamic_library(lib_name, &symbols); 143 | size_t nsymbols_old = nsymbols; 144 | linked_list_reversed_foreach(symbols, struct dynlib_sym*, it, 145 | { 146 | //printf("\tsym: %s\n", it->sym_name); 147 | ++nsymbols; 148 | }); 149 | if (opt_flags & OPT_VERBOSE) 150 | printf("linking against '%s', found %d symbols.\n", lib_name, nsymbols - nsymbols_old); 151 | } 152 | break; 153 | } 154 | } 155 | else 156 | { 157 | if (opt_flags & OPT_VERBOSE) 158 | printf("adding %s\n", argv[i]); 159 | files[numfiles++] = argv[i]; 160 | } 161 | } 162 | //TODO: multiple source files 163 | const char* src = files[numfiles > 1 ? numfiles - 2 : numfiles - 1]; 164 | const char* dst = NULL; 165 | if(build_target != BT_MEMORY) 166 | dst = files[numfiles - 1]; 167 | if (src == dst) 168 | dst = "a.out"; 169 | if (opt_flags & OPT_VERBOSE) 170 | printf("src: %s, dst: %s\n", src, dst); 171 | 172 | /* pre.c */ 173 | heap_string preprocess_file( const char* filename, const char** includepaths, int verbose, struct hash_map *defines, struct hash_map **defines_out); 174 | const char* includepaths[] = { "examples/include/", NULL }; 175 | heap_string data = preprocess_file( src, includepaths, 0, NULL, NULL ); 176 | 177 | if ( !data ) 178 | { 179 | printf( "failed to read file '%s'\n", src ); 180 | return 1; 181 | } 182 | 183 | struct token *tokens = NULL; 184 | int num_tokens = 0; 185 | 186 | // printf("data = %s\n", data); 187 | parse( data , &tokens, &num_tokens, LEX_FL_NONE); 188 | heap_string_free( &data ); 189 | 190 | //printf("num_tokens = %d\n", num_tokens); 191 | char str[256]={0}; 192 | for(int i = 0; i < num_tokens; ++i) 193 | { 194 | struct token *tk = &tokens[i]; 195 | token_to_string(tk, str, sizeof(str)); 196 | //printf("token %s\n", str); 197 | } 198 | 199 | struct linked_list *ast_list = NULL; 200 | struct ast_node *root = NULL; 201 | compiler_t ctx = { 0 }; 202 | ctx.build_target = build_target; 203 | ctx.find_import_fn = find_lib_symbol; 204 | ctx.find_import_fn_userptr = symbols; 205 | int ast = generate_ast(tokens, num_tokens, &ast_list, &root, opt_flags & OPT_AST); 206 | if(!ast && (opt_flags & OPT_AST) != OPT_AST) 207 | { 208 | // generate native code 209 | heap_string data_buf = NULL; 210 | 211 | static codegen_t cg; 212 | codegen_x64(&cg); 213 | 214 | int compile_status = compile_ast( root, &ctx, &cg ); 215 | if ( !compile_status ) 216 | { 217 | if ( (opt_flags & OPT_INSTR) != OPT_INSTR ) 218 | { 219 | int build_elf_image( compiler_t * ctx, const char* binary_path ); 220 | int build_elf64_image( compiler_t * ctx, const char* binary_path ); 221 | int build_exe_image( compiler_t * ctx, const char* binary_path ); 222 | int build_memory_image( compiler_t * ctx, const char* binary_path ); 223 | int ret; 224 | switch (build_target) 225 | { 226 | case BT_WIN32: 227 | ret = build_exe_image(&ctx, dst); 228 | break; 229 | case BT_LINUX_X86: 230 | ret = build_elf_image(&ctx, dst); 231 | break; 232 | case BT_LINUX_X64: 233 | ret = build_elf64_image(&ctx, dst); 234 | break; 235 | case BT_MEMORY: 236 | ret = build_memory_image(&ctx, dst); 237 | break; 238 | case BT_OPCODES: 239 | { 240 | heap_string instr = ctx.instr; 241 | int n = heap_string_size(&instr); 242 | linked_list_reversed_foreach(ctx.relocations, struct relocation*, it, 243 | { 244 | if(it->type == RELOC_DATA) 245 | { 246 | *(u32*)&instr[it->from] = it->to + n; 247 | } 248 | else if(it->type == RELOC_CODE) 249 | { 250 | *(u32*)&instr[it->from] = it->to; 251 | } else 252 | { 253 | printf("unknown relocation type %d\n", it->type); 254 | exit(1); 255 | } 256 | }); 257 | 258 | for(int i = 0; i < n; ++i) 259 | { 260 | printf("%02X%s", instr[i] & 0xff, i + 1 == n ? "" : " "); 261 | } 262 | putchar(' '); 263 | heap_string data_buf = ctx.data; 264 | size_t dl = heap_string_size(&data_buf); 265 | for(int i = 0; i < dl; ++i) 266 | { 267 | printf("%02X%s", data_buf[i] & 0xff, i + 1 == dl ? "" : " "); 268 | } 269 | } break; 270 | } 271 | if (opt_flags & OPT_VERBOSE) 272 | printf( "building image '%s' (return code = %d)\n", dst, ret ); 273 | } else 274 | { 275 | for ( int i = 0; i < heap_string_size( &ctx.instr ); ++i ) 276 | printf( "%02X ", ctx.instr[i] & 0xff ); 277 | putchar('\n'); 278 | } 279 | heap_string_free( &ctx.data ); 280 | heap_string_free( &ctx.instr ); 281 | linked_list_destroy( &ctx.relocations ); 282 | } 283 | heap_string_free( &data_buf ); 284 | 285 | root = NULL; 286 | linked_list_destroy(&ast_list); 287 | } 288 | free(tokens); 289 | //getchar(); 290 | return 0; 291 | } 292 | -------------------------------------------------------------------------------- /memory.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "compile.h" 7 | #include "rhd/linked_list.h" 8 | #include "util.h" 9 | #include "std.h" 10 | 11 | #ifdef _WIN32 12 | #define WIN32_LEAN_AND_MEAN 13 | #include 14 | #endif 15 | 16 | extern int opt_flags; 17 | 18 | int build_memory_image(compiler_t *ctx, const char *binary_path) 19 | { 20 | heap_string instr = ctx->instr; 21 | heap_string data_buf = ctx->data; 22 | 23 | #ifdef _WIN32 24 | 25 | //TODO: FIXME change hardcoded x86 to x64 or other arch later 26 | #define ALIGNMENT (0x1000) 27 | 28 | //TODO: allocate N bytes of size instr and align it to page size 29 | //TODO: make seperate data buffer and make it non-executable 30 | 31 | SYSTEM_INFO si; 32 | GetSystemInfo(&si); 33 | 34 | u32 il = heap_string_size(&instr); 35 | size_t dl = heap_string_size(&data_buf); 36 | size_t page_size = si.dwPageSize; 37 | size_t sztotal = il + dl; 38 | char* buffer = VirtualAlloc(NULL, sztotal, MEM_COMMIT, PAGE_READWRITE); 39 | memcpy(buffer, instr, il); 40 | intptr_t code_offs = (intptr_t)buffer; 41 | intptr_t data_offs = 0; 42 | if (dl > 0) 43 | { 44 | memcpy(&buffer[il], data_buf, dl); 45 | data_offs = code_offs + il; 46 | } 47 | 48 | 49 | linked_list_reversed_foreach(ctx->relocations, struct relocation*, it, 50 | { 51 | if (it->type == RELOC_DATA) 52 | { 53 | *(u32*)&buffer[it->from] = it->to + data_offs; 54 | if (opt_flags & OPT_VERBOSE) 55 | printf("[DATA] relocating %d bytes from %02X to %02X\n", it->size, it->from, it->to + data_offs); 56 | } 57 | else if (it->type == RELOC_CODE) 58 | { 59 | *(u32*)&buffer[it->from] = it->to + code_offs; 60 | if (opt_flags & OPT_VERBOSE) 61 | printf("[CODE] relocating %d bytes from %02X to %02X\n", it->size, it->from, it->to + code_offs); 62 | } 63 | else if (it->type == RELOC_IMPORT) 64 | { 65 | struct dynlib_sym* sym = (struct dynlib_sym*)it->to; 66 | intptr_t realcodepos = code_offs + it->from; 67 | *(u32*)&buffer[it->from] = realcodepos + 6; 68 | *(u32*)&buffer[it->from + 6] = sym->offset; 69 | if (opt_flags & OPT_VERBOSE) 70 | printf("[IMPORT] relocating %d bytes\n", it->size); 71 | } 72 | else 73 | { 74 | printf("unknown relocation type %d\n", it->type); 75 | } 76 | }); 77 | DWORD dummy; 78 | VirtualProtect(buffer, sztotal, PAGE_EXECUTE_READ, &dummy); 79 | int (__cdecl *fn)(void) = (int(__cdecl *)(void))buffer; 80 | int result = fn(); 81 | if (opt_flags & OPT_VERBOSE) 82 | printf("result: %d\n", result); 83 | VirtualFree(buffer, 0, MEM_RELEASE); 84 | #else 85 | printf("memory build target is unsupported on this platform.\n"); 86 | exit(1); 87 | #endif 88 | return 0; 89 | } 90 | -------------------------------------------------------------------------------- /operand.h: -------------------------------------------------------------------------------- 1 | #ifndef OPERAND_H 2 | #define OPERAND_H 3 | #include "imm.h" 4 | #include "register.h" 5 | #include 6 | 7 | typedef enum 8 | { 9 | VOPERAND_INVALID, 10 | VOPERAND_IMMEDIATE, 11 | VOPERAND_INDIRECT, 12 | VOPERAND_REGISTER, 13 | VOPERAND_INDIRECT_REGISTER, 14 | VOPERAND_INDIRECT_REGISTER_DISPLACEMENT, 15 | VOPERAND_INDIRECT_REGISTER_INDEXED, 16 | // for dst only register and address are valid 17 | VOPERAND_LABEL 18 | } voperand_type_t; 19 | 20 | static const char *voperand_type_strings[] = {"invalid","immediate","indirect","register","indirect register","indirect register displacement","register indexed",NULL}; 21 | 22 | typedef enum 23 | { 24 | VOPERAND_SIZE_NATIVE, // e.g native register size for the architecture (x86 -> 32, x64 -> 64), 25 | VOPERAND_SIZE_8_BITS = 1, 26 | VOPERAND_SIZE_16_BITS = 2, 27 | VOPERAND_SIZE_32_BITS = 4, 28 | VOPERAND_SIZE_64_BITS = 8, 29 | VOPERAND_SIZE_DOUBLE, 30 | VOPERAND_SIZE_FLOAT 31 | } voperand_size_t; 32 | 33 | static const char *voperand_size_names[] = {"native","","word","","dword","","","","qword","double","float",NULL}; 34 | 35 | typedef struct 36 | { 37 | int type; 38 | voperand_size_t size; 39 | union 40 | { 41 | imm_t imm; 42 | vregister_t reg; 43 | imm_t indirect; 44 | struct 45 | { 46 | i32 disp; 47 | vregister_t reg; 48 | } reg_indirect_displacement; 49 | struct 50 | { 51 | i32 scale; 52 | vregister_t indexed_reg; 53 | vregister_t reg; 54 | } reg_indirect_indexed; 55 | size_t label; 56 | }; 57 | bool virtual; 58 | } voperand_t; 59 | 60 | static voperand_t indirect_operand(imm_t imm, voperand_size_t size) 61 | { 62 | voperand_t op = {.type = VOPERAND_INDIRECT, .size = size, .virtual = true}; 63 | op.imm = imm; 64 | return op; 65 | } 66 | 67 | static voperand_t indirect_register_operand(vregister_t reg) 68 | { 69 | voperand_t op = {.type = VOPERAND_INDIRECT_REGISTER, .size = VOPERAND_SIZE_NATIVE, .virtual = true}; 70 | op.reg = reg; 71 | return op; 72 | } 73 | 74 | static voperand_t indirect_register_displacement_operand(vregister_t reg, u32 disp, voperand_size_t size) 75 | { 76 | voperand_t op = {.type = VOPERAND_INDIRECT_REGISTER_DISPLACEMENT, .size = size, .virtual = true}; 77 | op.reg_indirect_displacement.reg = reg; 78 | op.reg_indirect_displacement.disp = disp; 79 | return op; 80 | } 81 | 82 | static voperand_t indirect_register_indexed_operand(vregister_t reg, vregister_t indexreg, u32 scale, voperand_size_t size) 83 | { 84 | voperand_t op = {.type = VOPERAND_INDIRECT_REGISTER_INDEXED, .size = size, .virtual = true}; 85 | op.reg_indirect_indexed.indexed_reg = indexreg; 86 | op.reg_indirect_indexed.reg = reg; 87 | op.reg_indirect_indexed.scale = scale; 88 | return op; 89 | } 90 | 91 | static voperand_t label_operand(size_t label_value) 92 | { 93 | voperand_t op = {.type = VOPERAND_LABEL, .size = VOPERAND_SIZE_NATIVE, .virtual = true}; 94 | op.label = label_value; 95 | return op; 96 | } 97 | 98 | static voperand_t imm32_operand(i32 i) 99 | { 100 | voperand_t op = {.type = VOPERAND_IMMEDIATE, .size = VOPERAND_SIZE_32_BITS, .virtual = true}; 101 | op.imm.nbits = 32; 102 | op.imm.dd = i; 103 | return op; 104 | } 105 | 106 | static voperand_t imm64_operand(i64 i) 107 | { 108 | voperand_t op = {.type = VOPERAND_IMMEDIATE, .size = VOPERAND_SIZE_64_BITS, .virtual = true}; 109 | op.imm.nbits = 64; 110 | op.imm.dq = i; 111 | return op; 112 | } 113 | 114 | static voperand_t invalid_operand() 115 | { 116 | voperand_t op = {.type = VOPERAND_INVALID, .virtual = true}; 117 | return op; 118 | } 119 | 120 | static voperand_t register_operand(vregister_t reg) 121 | { 122 | voperand_t op = {.type = VOPERAND_REGISTER, .size = VOPERAND_SIZE_NATIVE, .virtual = true}; 123 | op.reg = reg; 124 | return op; 125 | } 126 | 127 | static bool voperand_type_equal(voperand_t* a, voperand_t* b) 128 | { 129 | if (a->type == VOPERAND_REGISTER && b->type == VOPERAND_REGISTER && a->size == b->size) 130 | return true; 131 | return !memcmp(a, b, sizeof(voperand_t)); 132 | } 133 | 134 | #endif 135 | -------------------------------------------------------------------------------- /parse.c: -------------------------------------------------------------------------------- 1 | #include "parse.h" 2 | #include "std.h" 3 | #include "token.h" 4 | 5 | struct token* parse_token(struct parse_context* ctx) 6 | { 7 | if (ctx->token_index >= ctx->num_tokens) 8 | return NULL; 9 | ctx->current_token = &ctx->tokens[ctx->token_index]; 10 | return ctx->current_token; 11 | } 12 | 13 | struct token* parse_advance(struct parse_context* ctx) 14 | { 15 | struct token* t = parse_token(ctx); 16 | ++ctx->token_index; 17 | return t; 18 | } 19 | 20 | void parse_initialize(struct parse_context* ctx) 21 | { 22 | ctx->current_token = NULL; 23 | ctx->num_tokens = 0; 24 | ctx->token_index = 0; 25 | ctx->tokens = NULL; 26 | } 27 | 28 | int parse_string(struct parse_context* ctx, const char* str, int flags) 29 | { 30 | // TODO: handle errors 31 | parse(str, &ctx->tokens, &ctx->num_tokens, flags); 32 | return 0; 33 | } 34 | 35 | void parse_cleanup(struct parse_context* ctx) 36 | { 37 | free(ctx->tokens); 38 | } 39 | 40 | int parse_accept(struct parse_context* ctx, int type) 41 | { 42 | struct token* old_token = ctx->current_token; 43 | struct token* tk = parse_token(ctx); 44 | 45 | if (!tk || tk->type != type) 46 | { 47 | ctx->current_token = old_token; 48 | // debug_printf("tk->type %s (%d) != type %s (%d)\n", token_type_to_string(tk->type), tk->type, 49 | // token_type_to_string(type), type); 50 | return 1; 51 | } 52 | ++ctx->token_index; 53 | return 0; 54 | } 55 | -------------------------------------------------------------------------------- /parse.h: -------------------------------------------------------------------------------- 1 | #ifndef PARSE_H 2 | #define PARSE_H 3 | #include 4 | 5 | struct parse_context 6 | { 7 | struct token *tokens; 8 | int num_tokens; 9 | int token_index; 10 | struct token *current_token; 11 | 12 | jmp_buf jmp; 13 | }; 14 | 15 | enum LEX_FLAG 16 | { 17 | LEX_FL_NONE = 0, 18 | LEX_FL_NEWLINE_TOKEN = 1, 19 | LEX_FL_BACKSLASH_TOKEN = 2, 20 | LEX_FL_FORCE_IDENT = 4 21 | //LEX_FL_PREPROCESSOR_MODE = 4 //maybe 22 | }; 23 | 24 | void parse(const char*, struct token**, int*, int); 25 | int parse_accept( struct parse_context* ctx, int type ); 26 | struct token* parse_token( struct parse_context* ctx ); 27 | void parse_initialize( struct parse_context* ctx ); 28 | int parse_string( struct parse_context* ctx, const char* str, int ); 29 | void parse_cleanup( struct parse_context* ctx ); 30 | struct token* parse_advance( struct parse_context* ctx ); 31 | static void parse_reset( struct parse_context* ctx ) 32 | { 33 | ctx->token_index = 0; 34 | } 35 | #endif 36 | -------------------------------------------------------------------------------- /pe.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "compile.h" 7 | #include "rhd/std.h" 8 | #include "rhd/linked_list.h" 9 | #include "util.h" 10 | #include 11 | 12 | //https://docs.microsoft.com/en-us/windows/win32/debug/pe-format 13 | 14 | #define IMAGE_FILE_MACHINE_I386 (0x14c) 15 | #define IMAGE_FILE_MACHINE_AMD64 (0x8664) 16 | 17 | #define IMAGE_FILE_RELOCS_STRIPPED (0x0001) 18 | #define IMAGE_FILE_EXECUTABLE_IMAGE (0x002) 19 | #define IMAGE_FILE_32BIT_MACHINE (0x0100) 20 | 21 | #define IMAGE_SUBSYSTEM_WINDOWS_CUI (0x3) 22 | 23 | #define IMAGE_DLLCHARACTERISTICS_NX_COMPAT (0x0100) 24 | #define IMAGE_DLLCHARACTERISTICS_DYNAMIC_BASE (0x0040) 25 | #define IMAGE_DLLCHARACTERISTICS_NO_SEH (0x0400) 26 | #define IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE (0x8000) 27 | 28 | PACK(struct pe_hdr 29 | { 30 | u8 sig[4]; //pe\0\0 31 | u16 machine; //IMAGE_FILE_MACHINE_I386 32 | u16 numsections; //1 33 | u32 timestamp; //time(0) 34 | u8 pad[8]; //deprecated 35 | u16 size_of_optional_header; //0xe0 36 | u16 characteristics; //IMAGE_FILE_32BIT_MACHINE | IMAGE_FILE_EXECUTABLE_IMAGE 37 | }); 38 | 39 | PACK(struct data_dir 40 | { 41 | u32 rva; 42 | u32 sz; 43 | }); 44 | 45 | enum data_dir_type 46 | { 47 | DDT_EXPORT, 48 | DDT_IMPORT, 49 | DDT_RESOURCE, 50 | DDT_EXCEPTION, 51 | DDT_SECURITY, 52 | DDT_RELOC, 53 | DDT_DEBUG, 54 | DDT_ARCH, 55 | DDT_GLOB_PTR, 56 | DDT_TLS, 57 | DDT_CFG, 58 | DDT_BOUND_IMPORT, 59 | DDT_IAT, 60 | DDT_DELAY_IMPORT 61 | }; 62 | 63 | PACK(struct opt_hdr 64 | { 65 | u16 magic; //0x10b 66 | u8 major_linker_version; //0xe 67 | u8 minor_linker_version; //0x1d 68 | u32 size_of_code; //0x200 69 | u32 size_of_initialized_data; //0x400 70 | u32 size_of_uninitialized_Data; //0x0 71 | u32 address_of_entry_point; //0x1000 72 | u32 base_of_code; //0x1000 73 | u32 base_of_data; //0x2000 74 | u32 image_base; //0x400000 75 | u32 section_alignment; //0x1000 76 | u32 file_alignment; //0x200 77 | u16 major_operating_system_version; //0x4 78 | u16 minor_operating_system_version; //0x0 79 | u16 major_image_version; //0x0 80 | u16 minor_image_version; //0x0 81 | u16 major_subsystem_version; //0x4 82 | u16 minor_subsystem_version; //0x0 83 | u32 win32_version_value; //0x0 84 | u32 size_of_image; //0x4000 85 | u32 size_of_headers; //0x400 86 | u32 checksum; //0x0 87 | u16 subsystem; //IMAGE_SUBSYSTEM_WINDOWS_CUI 88 | u16 dll_characteristics; //IMAGE_DLLCHARACTERISTICS_DYNAMIC_BASE | IMAGE_DLLCHARACTERISTICS_NO_SEH | IMAGE_DLLCHARACTERISTICS_NX_COMPAT | IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE 89 | u32 size_of_stack_reserve; //0x100000 90 | u32 size_of_stack_commit; //0x1000 91 | u32 size_of_heap_reserve; //0x100000 92 | u32 size_of_heap_commit; //0x1000 93 | u32 loader_flags; //0x0 94 | u32 number_of_rva_and_sizes; //0x10 95 | struct data_dir dir[16]; 96 | }); 97 | 98 | #define IMAGE_SCN_MEM_EXECUTE ( 0x20000000 ) 99 | #define IMAGE_SCN_MEM_READ ( 0x40000000 ) 100 | #define IMAGE_SCN_CNT_CODE ( 0x00000020 ) 101 | 102 | PACK(struct section_hdr 103 | { 104 | char name[8]; 105 | u32 virtual_size; 106 | u32 virtual_address; 107 | u32 size_of_raw_data; 108 | u32 pointer_to_raw_data; 109 | u32 pointer_to_relocations; 110 | u32 pointer_to_linenumbers; 111 | u16 number_of_relocations; 112 | u16 number_of_linenumbers; 113 | u32 characteristics; 114 | }); 115 | 116 | int build_exe_image(compiler_t *ctx, const char *binary_path) 117 | { 118 | heap_string instr = ctx->instr; 119 | heap_string data_buf = ctx->data; 120 | 121 | #define ORG (0x400000) 122 | #define ALIGNMENT (0x1000) 123 | 124 | struct pe_hdr pe = { 0 }; 125 | 126 | pe.sig[0] = 'P'; 127 | pe.sig[1] = 'E'; 128 | pe.sig[2] = '\0'; 129 | pe.sig[3] = '\0'; 130 | 131 | pe.machine = IMAGE_FILE_MACHINE_I386; 132 | pe.numsections = 1; 133 | pe.timestamp = (int)time( 0 ); 134 | pe.size_of_optional_header = 0xe0; 135 | pe.characteristics = IMAGE_FILE_32BIT_MACHINE | IMAGE_FILE_EXECUTABLE_IMAGE; 136 | 137 | struct opt_hdr opt = { 0 }; 138 | opt.magic = 0x10b; 139 | opt.major_linker_version = 0xe; 140 | opt.minor_linker_version = 0x1d; 141 | opt.size_of_code = 0x200; 142 | opt.size_of_initialized_data = 0x400; 143 | opt.size_of_initialized_data = 0x0; 144 | opt.address_of_entry_point = 0x1000; 145 | opt.base_of_code = 0x1000; 146 | opt.base_of_data = 0x2000; 147 | opt.image_base = ORG; 148 | opt.section_alignment = ALIGNMENT; 149 | opt.file_alignment = 0x200; 150 | opt.major_operating_system_version = 0x4; // or 0x6 151 | opt.minor_operating_system_version = 0x0; 152 | opt.major_image_version = 0x4; 153 | opt.minor_image_version = 0x0; 154 | opt.major_subsystem_version = 0x4; 155 | opt.minor_subsystem_version = 0x0; 156 | opt.win32_version_value = 0x0; 157 | opt.size_of_image = 0x4000; 158 | opt.size_of_headers = 0x400; 159 | opt.checksum = 0x0; 160 | opt.subsystem = IMAGE_SUBSYSTEM_WINDOWS_CUI; 161 | opt.dll_characteristics = IMAGE_DLLCHARACTERISTICS_DYNAMIC_BASE | IMAGE_DLLCHARACTERISTICS_NO_SEH | 162 | IMAGE_DLLCHARACTERISTICS_NX_COMPAT | IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE; 163 | opt.size_of_stack_reserve = 0x100000; 164 | opt.size_of_stack_commit = 0x1000; 165 | opt.size_of_heap_reserve = 0x100000; 166 | opt.size_of_heap_reserve = 0x1000; 167 | opt.loader_flags = 0x0; 168 | opt.number_of_rva_and_sizes = 0x10; 169 | 170 | //opt.dir[DDT_IMPORT].rva = 0x20e0; 171 | //opt.dir[DDT_IMPORT].sz = 40; 172 | 173 | 174 | heap_string image = NULL; 175 | 176 | db(&image, 'M'); 177 | db(&image, 'Z'); 178 | pad(&image, 0x3c - 2); 179 | 180 | dd(&image, 64); //pe offset 181 | 182 | buf(&image, (const char*)&pe, sizeof(pe)); 183 | buf(&image, (const char*)&opt, sizeof(opt)); 184 | 185 | struct section_hdr section = { 0 }; 186 | snprintf( section.name, sizeof( section.name ), ".text" ); 187 | section.virtual_size = 4; //instr size 188 | section.virtual_address = 0x1000; 189 | section.size_of_raw_data = 4; 190 | section.pointer_to_raw_data = 512; 191 | section.characteristics = IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_EXECUTE | IMAGE_SCN_CNT_CODE; 192 | buf(&image, (const char*)§ion, sizeof(section)); 193 | 194 | pad_align(&image, 0x200); 195 | db(&image, 0x6a); 196 | db(&image, 0x7f); 197 | db(&image, 0x58); 198 | db(&image, 0xc3); 199 | printf("pos = %d,%02X\n",heap_string_size(&image),heap_string_size(&image)); 200 | 201 | int filesize = heap_string_size(&image); 202 | FILE* fp; 203 | std_fopen_s(&fp, binary_path, "wb"); 204 | if(!fp) 205 | { 206 | char errorMessage[1024]; 207 | std_strerror_s(errorMessage, sizeof(errorMessage), errno); 208 | printf("failed to open '%s', error = %s\n", binary_path, errorMessage); 209 | return 1; 210 | } 211 | fwrite(image, filesize, 1, fp); 212 | fclose(fp); 213 | 214 | heap_string_free(&image); 215 | return 0; 216 | } 217 | -------------------------------------------------------------------------------- /pre.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "parse.h" 6 | #include "token.h" 7 | 8 | #ifdef STANDALONE 9 | #define HEAP_STRING_IMPL 10 | #define LINKED_LIST_IMPL 11 | #define HASH_MAP_IMPL 12 | #endif 13 | 14 | #include "rhd/hash_map.h" 15 | #include "rhd/heap_string.h" 16 | #include "rhd/linked_list.h" 17 | 18 | // TODO: recursively including files, does not update filepath of the filename 19 | 20 | // TODO: change default include path to be more dynamic 21 | #define INCLUDE_PATH "examples/include/" 22 | 23 | struct include_directive 24 | { 25 | heap_string path; 26 | int start, end; 27 | }; 28 | 29 | struct define_directive 30 | { 31 | heap_string identifier; 32 | int function; 33 | heap_string parameters[32]; // TODO: increase amount? 34 | int numparameters; 35 | heap_string body; 36 | }; 37 | 38 | struct pre_context 39 | { 40 | struct parse_context parse_context; 41 | struct linked_list* includes; 42 | const char** includepaths; 43 | struct hash_map* identifiers; 44 | jmp_buf jmp; 45 | const char* sourcedir; 46 | heap_string data; 47 | int verbose; 48 | int scope_bit; 49 | int scope_visibility; 50 | }; 51 | 52 | static int pre_accept(struct pre_context* ctx, int type) 53 | { 54 | return parse_accept(&ctx->parse_context, type); 55 | } 56 | 57 | static struct token* pre_token(struct pre_context* ctx) 58 | { 59 | return ctx->parse_context.current_token; 60 | } 61 | 62 | static heap_string concatenate(const char* a, const char* b) 63 | { 64 | if (!a) 65 | a = ""; 66 | heap_string c = heap_string_alloc(strlen(a) + strlen(b) + 1); 67 | heap_string_appendf(&c, "%s%s", a, b); 68 | return c; 69 | } 70 | 71 | static heap_string filepath(const char* filename) 72 | { 73 | assert(filename); 74 | heap_string fp = heap_string_new(filename); 75 | int sz = heap_string_size(&fp); 76 | for (int i = 0; i < sz; ++i) 77 | { 78 | if (fp[sz - i - 1] == '/') // path seperator 79 | break; 80 | fp[sz - i - 1] = 0; 81 | } 82 | return fp; 83 | } 84 | 85 | static void pre_expect(struct pre_context* ctx, int type) 86 | { 87 | if (!pre_accept(ctx, type)) 88 | return; 89 | printf("preprocessor error: expected token '%s', got '%s'\n", token_type_to_string(type), 90 | token_type_to_string(parse_token(&ctx->parse_context)->type)); 91 | 92 | longjmp(ctx->jmp, 1); 93 | } 94 | 95 | static void pre_error(struct pre_context* ctx, const char* msg) 96 | { 97 | printf("preprocess error: %s\n", msg); 98 | longjmp(ctx->jmp, 1); 99 | } 100 | 101 | static void append_token_buffer(struct pre_context* ctx, heap_string* str, struct token* tk) 102 | { 103 | int len = tk->end - tk->start; 104 | for (int i = 0; i < len; ++i) 105 | heap_string_push(str, ctx->data[tk->start + i]); 106 | } 107 | 108 | static const char* pre_string(struct pre_context* ctx) 109 | { 110 | if (!pre_token(ctx)) 111 | return ""; 112 | return pre_token(ctx)->string; 113 | } 114 | 115 | static struct define_directive* find_identifier(struct pre_context* ctx, const char* ident) 116 | { 117 | return hash_map_find(ctx->identifiers, ident); 118 | } 119 | 120 | int file_exists(const char* filename) 121 | { 122 | struct stat buffer; 123 | return (stat(filename, &buffer) == 0); 124 | } 125 | 126 | heap_string locate_include_file(struct pre_context* ctx, const char* includepath) 127 | { 128 | heap_string path = concatenate(ctx->sourcedir, includepath); 129 | if (file_exists(path)) 130 | return path; 131 | heap_string_free(&path); 132 | 133 | for (const char** it = ctx->includepaths; *it; ++it) 134 | { 135 | path = concatenate(*it, includepath); 136 | if (file_exists(path)) 137 | return path; 138 | heap_string_free(&path); 139 | } 140 | return NULL; 141 | } 142 | 143 | static void handle_define_ident(struct pre_context* ctx, struct define_directive* d, heap_string* preprocessed) 144 | { 145 | if (!pre_accept(ctx, '(')) 146 | { 147 | assert(d->function); 148 | 149 | int nargs = 0; 150 | struct token* args[16]; 151 | 152 | do 153 | { 154 | struct token* tk = parse_advance(&ctx->parse_context); 155 | if (!tk || tk->type == TK_EOF) 156 | return; 157 | 158 | if (tk->type != TK_INTEGER && tk->type != TK_STRING && tk->type != TK_IDENT) 159 | { 160 | // TODO: FIXME handle actual expressions and nested macro functions 161 | // for now this is good enough for my usecase 162 | pre_error(ctx, "expected string, ident or integer"); 163 | break; 164 | } 165 | args[nargs++] = tk; 166 | } while (!pre_accept(ctx, ',')); 167 | pre_expect(ctx, ')'); 168 | 169 | struct parse_context tmp; 170 | parse_initialize(&tmp); 171 | parse_string(&tmp, d->body, LEX_FL_NEWLINE_TOKEN | LEX_FL_BACKSLASH_TOKEN | LEX_FL_FORCE_IDENT); 172 | while (1) 173 | { 174 | struct token* dt = parse_advance(&tmp); 175 | if (!dt || dt->type == TK_EOF) 176 | break; 177 | // printf( "tk = %.*s\n", dl, dbuf ); 178 | switch (dt->type) 179 | { 180 | case TK_IDENT: 181 | { 182 | int param_index = -1; 183 | for (int i = 0; i < d->numparameters; ++i) 184 | { 185 | if (!strcmp(d->parameters[i], dt->string)) 186 | { 187 | param_index = i; 188 | break; 189 | } 190 | } 191 | if (param_index != -1) 192 | { 193 | // printf( "%s is at %d\n", dt->string, param_index ); 194 | // printf( "replace = %d\n", args[param_index]->integer ); 195 | struct token* parm_token = args[param_index]; 196 | int dl = parm_token->end - parm_token->start; 197 | assert(dl > 0); 198 | const char* dbuf = &ctx->data[parm_token->start]; 199 | // TODO: FIXME should we push ' ' by hand? 200 | heap_string_push(preprocessed, ' '); // incase no space for ident 201 | heap_string_appendn(preprocessed, dbuf, dl); 202 | break; 203 | } 204 | } 205 | 206 | default: 207 | { 208 | int dl = dt->end - dt->start; 209 | assert(dl > 0); 210 | const char* dbuf = &d->body[dt->start]; 211 | // printf( "dt type=%s,%d\n", token_type_to_string( dt->type ), dt->type ); 212 | heap_string_appendn(preprocessed, dbuf, dl); 213 | } 214 | break; 215 | } 216 | } 217 | parse_cleanup(&tmp); 218 | // pre_expect(ctx, ')'); 219 | } 220 | else 221 | { 222 | heap_string_append(preprocessed, d->body); 223 | } 224 | } 225 | 226 | heap_string preprocess_file(const char* filename, const char** includepaths, int verbose, struct hash_map* defines, 227 | struct hash_map** defines_out); 228 | static int handle_token(struct pre_context* ctx, heap_string* preprocessed, struct token* tk, int* handled) 229 | { 230 | *handled = 0; 231 | switch (tk->type) 232 | { 233 | case TK_IDENT: 234 | { 235 | struct define_directive* d = find_identifier(ctx, pre_string(ctx)); 236 | if (d) 237 | { 238 | handle_define_ident(ctx, d, preprocessed); 239 | } 240 | else 241 | { 242 | // ident not handled/replaced, return 0 and original ident buffer will be appended to the preprocessed 243 | // buffer 244 | return 0; 245 | } 246 | } 247 | break; 248 | 249 | case '#': 250 | pre_expect(ctx, TK_IDENT); 251 | const char* directive = pre_string(ctx); 252 | if (!strcmp(directive, "include")) 253 | { 254 | heap_string includepath = NULL; 255 | 256 | // printf( "got %s\n", pre_token( ctx )->string ); 257 | struct token* n = parse_advance(&ctx->parse_context); 258 | if (!pre_accept(ctx, '<') && !pre_accept(ctx, TK_STRING)) 259 | pre_error(ctx, "expected < or string"); 260 | 261 | if (n->type == '<') 262 | { 263 | while (1) 264 | { 265 | struct token* t = parse_token(&ctx->parse_context); 266 | if (!t) 267 | pre_error(ctx, "unexpected eof"); 268 | if (t->type == '>') 269 | { 270 | parse_advance(&ctx->parse_context); 271 | break; 272 | } 273 | append_token_buffer(ctx, &includepath, t); 274 | // printf("tk type = %s (%s)\n", token_type_to_string(t->type), t->string); 275 | parse_advance(&ctx->parse_context); 276 | } 277 | } 278 | else 279 | { 280 | // printf("tk type = %s (%s)\n", token_type_to_string(n->type), n->string); 281 | includepath = heap_string_new(n->string); 282 | } 283 | // printf("including '%s'\n", includepath); 284 | 285 | heap_string locatedincludepath = locate_include_file(ctx, includepath); 286 | struct hash_map* defines = NULL; 287 | heap_string includedata = preprocess_file(locatedincludepath ? locatedincludepath : includepath, 288 | ctx->includepaths, ctx->verbose, ctx->identifiers, &defines); 289 | // TODO: FIXME free current defines 290 | ctx->identifiers = defines; 291 | heap_string_free(&locatedincludepath); 292 | // heap_string includedata = locate_and_read_include_file(ctx, includepath); 293 | if (!includedata) 294 | { 295 | printf("failed to find include file '%s'\n", includepath); 296 | heap_string_free(&includepath); 297 | // pre_error(ctx, "include"); 298 | return 1; 299 | } 300 | 301 | heap_string_append(preprocessed, includedata); 302 | // heap_string_appendf(preprocessed, "%s", includedata); 303 | heap_string_free(&includedata); 304 | heap_string_free(&includepath); 305 | } 306 | else if (!strcmp(directive, "define")) 307 | { 308 | pre_expect(ctx, TK_IDENT); 309 | int ident_end = pre_token(ctx)->end; 310 | const char* ident = pre_string(ctx); 311 | struct define_directive d = { 312 | .identifier = heap_string_new(ident), .body = NULL, .function = 0, .numparameters = 0}; 313 | 314 | if (ctx->data[ident_end] == '(') 315 | { 316 | parse_advance(&ctx->parse_context); 317 | d.function = 1; 318 | do 319 | { 320 | pre_expect(ctx, TK_IDENT); 321 | d.parameters[d.numparameters++] = heap_string_new(pre_string(ctx)); 322 | } while (!pre_accept(ctx, ',')); 323 | pre_expect(ctx, ')'); 324 | } 325 | int bs = 0; 326 | while (1) 327 | { 328 | struct token* t = parse_token(&ctx->parse_context); 329 | if (!t) 330 | pre_error(ctx, "unexpected eof"); 331 | if (t->type == '\n') 332 | { 333 | if (!bs) 334 | { 335 | parse_advance(&ctx->parse_context); 336 | break; 337 | } 338 | bs = 0; 339 | } 340 | if (t->type == '\\') 341 | bs = 1; 342 | else 343 | append_token_buffer(ctx, &d.body, t); 344 | // TODO: FIXME free body 345 | // printf("tk type = %s (%s)\n", token_type_to_string(t->type), t->string); 346 | parse_advance(&ctx->parse_context); 347 | } 348 | if (!d.body) 349 | d.body = heap_string_new(""); 350 | hash_map_insert(ctx->identifiers, ident, d); 351 | // printf("defining %s, func = %d\n", ident, d.function); 352 | } 353 | else if (!strcmp(directive, "ifndef")) 354 | { 355 | pre_expect(ctx, TK_IDENT); 356 | int expr = find_identifier(ctx, pre_string(ctx)) == NULL ? 1 : 0; 357 | // heap_string_appendf(preprocessed, "// expr = %d\n", expr); 358 | ++ctx->scope_bit; 359 | ctx->scope_visibility |= (expr << ctx->scope_bit); 360 | } 361 | else if (!strcmp(directive, "ifdef")) 362 | { 363 | pre_expect(ctx, TK_IDENT); 364 | int expr = find_identifier(ctx, pre_string(ctx)) == NULL ? 0 : 1; 365 | // heap_string_appendf(preprocessed, "// expr = %d\n", expr); 366 | ++ctx->scope_bit; 367 | ctx->scope_visibility |= (expr << ctx->scope_bit); 368 | } 369 | else if (!strcmp(directive, "if")) 370 | { 371 | struct token* n = parse_advance(&ctx->parse_context); 372 | // TODO: FIXME make #if work with expressions 373 | if (!pre_accept(ctx, TK_INTEGER) && !pre_accept(ctx, TK_IDENT)) 374 | pre_error(ctx, "expected integer or ident"); 375 | int expr = (n->type == TK_INTEGER ? n->integer.value : (find_identifier(ctx, n->string) != NULL)) != 0; 376 | // heap_string_appendf(preprocessed, "// expr = %d\n", expr); 377 | ++ctx->scope_bit; 378 | ctx->scope_visibility |= (expr << ctx->scope_bit); 379 | } 380 | else if (!strcmp(directive, "undef")) 381 | { 382 | pre_expect(ctx, TK_IDENT); 383 | hash_map_remove_key(&ctx->identifiers, pre_string(ctx)); 384 | } 385 | break; 386 | 387 | default: 388 | *handled = 0; 389 | return 0; 390 | } 391 | *handled = 1; 392 | return 0; 393 | } 394 | 395 | heap_string preprocess(struct pre_context* ctx) 396 | { 397 | 398 | heap_string preprocessed = NULL; 399 | ctx->scope_bit = 0; 400 | ctx->scope_visibility = 1; // TODO: FIXME: max scopes 401 | 402 | while (1) 403 | { 404 | struct token* tk = parse_advance(&ctx->parse_context); 405 | if (!tk || tk->type == TK_EOF) 406 | break; 407 | if (tk->type == TK_IDENT && !strcmp(tk->string, "endif")) 408 | { 409 | assert(ctx->scope_bit > 0); 410 | --ctx->scope_bit; 411 | continue; 412 | } 413 | int in_scope = (ctx->scope_visibility & (1 << ctx->scope_bit)); 414 | if (!in_scope) 415 | continue; 416 | int handled; 417 | int err = handle_token(ctx, &preprocessed, tk, &handled); 418 | if (err) 419 | return NULL; 420 | if (handled) 421 | continue; 422 | int l = tk->end - tk->start; 423 | assert(l > 0); 424 | const char* buf = &ctx->data[tk->start]; 425 | // printf( "%.*s", l, buf ); 426 | // don't use appendf, has a hardcoded limit of 1024 at the time of writing this 427 | // heap_string_appendf(&preprocessed, "%.*s", l, buf); 428 | heap_string_appendn(&preprocessed, buf, l); 429 | // printf("tk type = %s (%s)\n", token_type_to_string(tk->type), tk->string); 430 | } 431 | return preprocessed; 432 | } 433 | 434 | struct hash_map* copy_definitions(struct hash_map* o) 435 | { 436 | assert(o); 437 | struct hash_map* n = hash_map_create(struct define_directive); 438 | // TODO: move this to rhd and name it something like iterate keys or entries 439 | for (size_t i = 0; i < o->bucket_size; ++i) 440 | { 441 | struct hash_bucket* bucket = &o->buckets[i]; 442 | if (bucket->head == NULL) 443 | continue; // skip.. empty bucket 444 | struct hash_bucket_entry* cur = bucket->head; 445 | while (cur != NULL) 446 | { 447 | struct define_directive* od = (struct define_directive*)cur->data; 448 | struct define_directive nd = {.identifier = heap_string_new(od->identifier), 449 | .body = heap_string_new(od->body), 450 | .function = od->function, 451 | .numparameters = od->numparameters}; 452 | hash_map_insert(n, cur->key, nd); 453 | cur = cur->next; 454 | } 455 | } 456 | return n; 457 | } 458 | 459 | heap_string preprocess_file(const char* filename, const char** includepaths, int verbose, struct hash_map* defines, 460 | struct hash_map** defines_out) 461 | { 462 | int success = 1; 463 | heap_string result_data = NULL; 464 | heap_string data = heap_string_read_from_text_file(filename); 465 | if (!data) 466 | return NULL; 467 | heap_string dir = filepath(filename); 468 | struct pre_context ctx = {.includes = linked_list_create(struct include_directive), 469 | .identifiers = 470 | defines ? copy_definitions(defines) : hash_map_create(struct define_directive), 471 | // TODO: FIXME add the source file that's including this file it's defines aswell / either 472 | // through list or just copying the identifiers 473 | .data = data, 474 | .includepaths = includepaths, 475 | .sourcedir = dir, 476 | .verbose = verbose}; 477 | parse_initialize(&ctx.parse_context); 478 | parse_string(&ctx.parse_context, data, LEX_FL_NEWLINE_TOKEN | LEX_FL_BACKSLASH_TOKEN | LEX_FL_FORCE_IDENT); 479 | if (setjmp(ctx.jmp)) 480 | { 481 | printf("failed preprocessing file '%s'\n", filename); 482 | success = 0; 483 | } 484 | else 485 | { 486 | result_data = preprocess(&ctx); 487 | if (!result_data) 488 | { 489 | printf("error, failed preprocessing\n"); 490 | success = 0; 491 | } 492 | } 493 | parse_cleanup(&ctx.parse_context); 494 | heap_string_free(&data); 495 | heap_string_free(&dir); 496 | if (defines_out) 497 | *defines_out = ctx.identifiers; 498 | return result_data; 499 | } 500 | 501 | #ifdef STANDALONE 502 | int main(int argc, char** argv) 503 | { 504 | int verbose = 0; 505 | assert(argc > 0); 506 | // printf( "argc=%d\n", argc ); 507 | const char* includepaths[16]; 508 | int includepathindex = 0; 509 | // includepaths[includepathindex++] = "/usr/include/"; 510 | // includepaths[includepathindex++] = "/usr/local/include/"; 511 | includepaths[includepathindex++] = "examples/include/"; 512 | includepaths[includepathindex] = NULL; 513 | 514 | int last_index = argc - 1; 515 | for (int i = 1; i < last_index; ++i) 516 | { 517 | assert(argv[i][0] == '-'); 518 | switch (argv[i][1]) 519 | { 520 | case 'v': 521 | verbose = 1; 522 | break; 523 | case 'I': 524 | { 525 | const char* includepath = (const char*)&argv[i][2]; 526 | if (verbose) 527 | printf("include path: %s\n", includepath); 528 | assert(includepathindex + 1 < 16); 529 | includepaths[includepathindex++] = includepath; 530 | includepaths[includepathindex] = NULL; 531 | } 532 | break; 533 | } 534 | } 535 | const char* source_filename = argv[last_index]; 536 | if (verbose) 537 | { 538 | printf("src=%s\n", source_filename); 539 | } 540 | 541 | heap_string b = preprocess_file(argv[1], includepaths, verbose, NULL, NULL); 542 | if (b) 543 | printf("%s\n", b); 544 | if (b) 545 | heap_string_free(&b); 546 | } 547 | #endif 548 | -------------------------------------------------------------------------------- /register.h: -------------------------------------------------------------------------------- 1 | #ifndef REGISTER_H 2 | #define REGISTER_H 3 | 4 | typedef enum 5 | { 6 | /* VRU_GENERAL_PURPOSE, */ 7 | /* VRU_FLOATING_POINT, */ 8 | VRU_MAX 9 | } vregister_usage_t; 10 | 11 | typedef struct 12 | { 13 | /* vregister_usage_t usage; */ 14 | int index; 15 | } vregister_t; 16 | #endif 17 | -------------------------------------------------------------------------------- /std.h: -------------------------------------------------------------------------------- 1 | #ifndef STD_H 2 | #define STD_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | enum OPT_FLAG 9 | { 10 | OPT_VERBOSE = 1, 11 | OPT_DEBUG = 2, 12 | OPT_AST = 4, 13 | OPT_INSTR = 8 14 | }; 15 | 16 | extern int opt_flags; 17 | 18 | static int debug_printf_r(int lineno, const char *filename, const char *fmt, ...) 19 | { 20 | char buffer[512] = { 0 }; 21 | va_list va; 22 | va_start(va, fmt); 23 | vsnprintf(buffer, sizeof(buffer), fmt, va); 24 | printf("[%s:%d] %s", filename, lineno, buffer); 25 | va_end(va); 26 | return 0; 27 | } 28 | 29 | #define debug_printf(fmt, ...) debug_printf_r(__LINE__, __FILE__, fmt, ## __VA_ARGS__) 30 | #ifndef COUNT_OF 31 | #define COUNT_OF(x) (sizeof((x)) / sizeof((x)[0])) 32 | #endif 33 | static void FIXME_FN( const char* filename, int linenumber, const char* fmt, ... ) 34 | { 35 | //TODO: FIXME unsafe reentry etc 36 | char buffer[512] = { 0 }; 37 | int n = snprintf(buffer, sizeof(buffer), "[fixme:%s:%d] ", filename, linenumber); 38 | assert(n < sizeof(buffer)); 39 | 40 | va_list args; 41 | va_start( args, fmt ); 42 | vsnprintf( &buffer[n], sizeof( buffer ) - n, fmt, args ); 43 | //perror( buffer ); 44 | printf("%s", buffer); 45 | va_end( args ); 46 | } 47 | 48 | #define FIXME( fmt, ... ) \ 49 | do { \ 50 | FIXME_FN( __FILE__, __LINE__, fmt, ## __VA_ARGS__ ); \ 51 | } while(0) 52 | 53 | #endif 54 | -------------------------------------------------------------------------------- /test.c: -------------------------------------------------------------------------------- 1 | #define HEAP_STRING_IMPL 2 | #include "rhd/heap_string.h" 3 | 4 | #define LINKED_LIST_IMPL 5 | #include "rhd/linked_list.h" 6 | 7 | #define HASH_MAP_IMPL 8 | #include "rhd/hash_map.h" 9 | 10 | #include "rhd/hash_string.h" 11 | 12 | #include "std.h" 13 | #include "ast.h" 14 | #include "compile.h" 15 | #include "parse.h" 16 | #include "token.h" 17 | #include "arena.h" 18 | 19 | void resolve_calls(ast_node_t* head, ast_node_t *func) 20 | { 21 | traverse_context_t ctx = { 0 }; 22 | 23 | ast_node_t* results[32]; 24 | printf("%s\n", AST_NODE_TYPE_to_string(func->type)); 25 | size_t n = ast_tree_nodes_by_type(&ctx, func, AST_FUNCTION_CALL_EXPR, &results, COUNT_OF(results)); 26 | printf("numresults: %d\n", n); 27 | for (int i = 0; i < n; ++i) 28 | { 29 | ast_node_t* f = ast_tree_node_by_identifier(&ctx, head, results[i]->call_expr_data.callee->identifier_data.name, AST_FUNCTION_DECL); 30 | printf("\t%s\n", results[i]->call_expr_data.callee->identifier_data.name); 31 | if (f) 32 | resolve_calls(head, f); 33 | } 34 | } 35 | 36 | static int is_floating_point_type(int t) 37 | { 38 | return t == LITERAL_FLOAT || t == LITERAL_DOUBLE; 39 | } 40 | 41 | static int coerce_type(int a, int b) 42 | { 43 | if (a == b) 44 | return a; 45 | //both floating point return the bigger data type e.g double > float 46 | if (is_floating_point_type(a) && is_floating_point_type(b)) 47 | return a == LITERAL_DOUBLE || b == LITERAL_DOUBLE ? LITERAL_DOUBLE : LITERAL_FLOAT; 48 | //coerce + to float 49 | //same for double 50 | if (is_floating_point_type(a)) 51 | return a; 52 | return b; 53 | } 54 | 55 | static int ast_is_scope_node(ast_node_t* n) 56 | { 57 | return n->type == AST_PROGRAM || n->type == AST_FUNCTION_DECL || n->type == AST_BLOCK_STMT || n->type == AST_DO_WHILE_STMT || n->type == AST_WHILE_STMT || n->type == AST_FOR_STMT || n->type == AST_IF_STMT; 58 | } 59 | 60 | //can either be AST_PROGRAM, AST_FUNCTION_DECL, AST_BLOCK 61 | static ast_node_t *ast_find_scope_node(ast_node_t *head, ast_node_t* n) 62 | { 63 | traverse_context_t ctx = { 0 }; 64 | ast_tree_node_by_node(&ctx, head, n); 65 | //assumption that parent is always AST_PROGRAM 66 | for (int i = 1; i < ctx.numresults; ++i) 67 | { 68 | ast_node_t* it = ast_tree_traverse_get_visitee(&ctx, i); 69 | if (ast_is_scope_node(it)) 70 | return it; 71 | } 72 | return NULL; 73 | } 74 | 75 | static ast_node_t *ast_node_expression_type(ast_node_t *head, ast_node_t* n) 76 | { 77 | switch (n->type) 78 | { 79 | case AST_LITERAL: 80 | case AST_ENUM_VALUE: 81 | return n; 82 | case AST_EXPR_STMT: 83 | return ast_node_expression_type(head, n->expr_stmt_data.expr); 84 | case AST_IDENTIFIER: 85 | { 86 | //get current scope 87 | ast_node_t *scope = ast_find_scope_node(head, n); 88 | 89 | //find ident by name in tree 90 | traverse_context_t ctx = { 0 }; 91 | ast_node_t* variable_decl = ast_tree_node_by_identifier(&ctx, head, n->identifier_data.name, AST_VARIABLE_DECL); 92 | if(variable_decl) 93 | return variable_decl->variable_decl_data.data_type; 94 | return NULL; 95 | } break; 96 | case AST_BIN_EXPR: 97 | return coerce_type( 98 | ast_node_expression_type(head, n->bin_expr_data.lhs), 99 | ast_node_expression_type(head, n->bin_expr_data.rhs) 100 | ); 101 | case AST_ASSIGNMENT_EXPR: 102 | return coerce_type( 103 | ast_node_expression_type(head, n->assignment_expr_data.lhs), 104 | ast_node_expression_type(head, n->assignment_expr_data.rhs) 105 | ); 106 | } 107 | return -1; 108 | } 109 | 110 | int main(int argc, char** argv) 111 | { 112 | arena_t *arena; 113 | arena_create(&arena, "compiler", 1000 * 1000 * 32); //32MB 114 | 115 | compiler_t compile_ctx; 116 | compiler_init(&compile_ctx, arena, 64); 117 | 118 | ast_context_t ast_context; 119 | ast_init_context(&ast_context, arena); 120 | 121 | while (1) 122 | { 123 | //printf(">"); 124 | //TODO: find out which variables are standalone and don't depend on other variables and reorder the flow of the program to make better use of registers? 125 | //keep track of registers e.g RAX and see what it's used for and check whether it has served it's purpose to make it available again? 126 | //by scanning forward? evaluating then seeing where it get's popped into temp instruction buffer 127 | //e.g for syscall eax then after syscall clear it 128 | const char *code = "int main(){ int g = 3; int x = 9; int y = 5; int z = 6; int l = 23; int test = 5 + 9; test += 10; return 0; }"; 129 | //fgets(code, sizeof(code), stdin); 130 | if(code[0] == 'q') 131 | break; 132 | //printf("\n"); 133 | struct token* tokens = NULL; 134 | int num_tokens = 0; 135 | 136 | //static const char* code = "int a = 3 + 3;"; 137 | parse(code, &tokens, &num_tokens, LEX_FL_NONE); 138 | 139 | if(ast_process_tokens(&ast_context, tokens, num_tokens)) 140 | break; 141 | free(tokens); 142 | 143 | traverse_context_t ctx = { 0 }; 144 | void print_ast(struct ast_node *n, int depth); 145 | //print_ast(program_node, 0); 146 | 147 | #if 0 148 | ast_node_t* main_func = ast_tree_node_by_identifier(&ctx, head, "main", AST_FUNCTION_DECL); 149 | if (!main_func) 150 | { 151 | printf("no main function!\n"); 152 | } 153 | else 154 | { 155 | resolve_calls(head, main_func); 156 | } 157 | #endif 158 | int codegen(compiler_t* ctx, ast_node_t*); 159 | if(codegen(&compile_ctx, ast_context.program_node)) 160 | break; 161 | break; 162 | } 163 | //printf("%d KB/%d KB bytes used\n", arena->used/1000, arena->reserved/1000); 164 | arena_destroy(&arena); 165 | return 0; 166 | } -------------------------------------------------------------------------------- /tests/exit-code.c: -------------------------------------------------------------------------------- 1 | int main() 2 | { 3 | return 123; 4 | } 5 | -------------------------------------------------------------------------------- /tests/precedence.c: -------------------------------------------------------------------------------- 1 | int main() 2 | { 3 | return ((1 + 2 * 3 + 4 * 5 + 6 / 3) * 4 - 2) % 32; 4 | } 5 | -------------------------------------------------------------------------------- /tests/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cc="bin/ocean64" 3 | 4 | for f in tests/*.c; 5 | do 6 | filename="${f%.*}" 7 | $cc $f "$filename" 8 | done 9 | 10 | #for f in tests/*.test; 11 | #do 12 | # ./$f 13 | # echo $? 14 | #done 15 | 16 | check_return() 17 | { 18 | eval "tests/$1" 19 | retval=$? 20 | if [ $retval -ne "$2" ]; then 21 | echo "Fail for $1, expected $2 got $retval" 22 | exit 23 | fi 24 | } 25 | 26 | check_return exit-code 123 27 | check_return precedence 18 28 | check_return while-loop 9 29 | -------------------------------------------------------------------------------- /tests/while-loop.c: -------------------------------------------------------------------------------- 1 | int main() 2 | { 3 | int i = 0; 4 | while(i < 10) 5 | { 6 | ++i; 7 | } 8 | return i; 9 | } 10 | -------------------------------------------------------------------------------- /token.h: -------------------------------------------------------------------------------- 1 | #ifndef TOKEN_H 2 | #define TOKEN_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "types.h" 9 | 10 | enum TOKEN_TYPE 11 | { 12 | // ASCII characters reserved 13 | TK_IDENT = 256, 14 | TK_INTEGER, 15 | TK_STRING, 16 | TK_FLOAT, 17 | TK_DOUBLE, 18 | 19 | TK_PLUS_ASSIGN, 20 | TK_MINUS_ASSIGN, 21 | TK_MULTIPLY_ASSIGN, 22 | TK_DIVIDE_ASSIGN, 23 | 24 | TK_AND_ASSIGN, 25 | TK_OR_ASSIGN, 26 | TK_XOR_ASSIGN, 27 | TK_MOD_ASSIGN, 28 | 29 | TK_GEQUAL, 30 | TK_LEQUAL, 31 | 32 | TK_T_CHAR, 33 | TK_T_SHORT, 34 | TK_T_INT, 35 | TK_T_FLOAT, 36 | TK_T_DOUBLE, 37 | TK_T_NUMBER, 38 | TK_T_VOID, 39 | TK_T_LONG, 40 | TK_T_UNSIGNED, 41 | 42 | TK_CONST, 43 | TK_SIZEOF, 44 | 45 | TK_LSHIFT, 46 | TK_RSHIFT, 47 | TK_EQUAL, 48 | TK_NOT_EQUAL, 49 | TK_IF, 50 | TK_ELSE, 51 | TK_FOR, 52 | TK_WHILE, 53 | TK_DO, 54 | TK_RETURN, 55 | TK_BREAK, 56 | TK_LOOP, 57 | TK_EMIT, 58 | TK_DOT_THREE_TIMES, 59 | TK_PLUS_PLUS, 60 | TK_MINUS_MINUS, 61 | TK_STRUCT, 62 | TK_UNION, 63 | TK_TYPEDEF, 64 | TK_ARROW, 65 | TK_ENUM, 66 | 67 | TK_EOF, 68 | TK_MAX, 69 | TK_INVALID = -1 70 | }; 71 | 72 | static const char* token_type_strings[TK_MAX] = {[TK_IDENT] = "ident", 73 | [TK_INTEGER] = "integer", 74 | [TK_STRING] = "string", 75 | [TK_FLOAT] = "float", 76 | [TK_DOUBLE] = "double", 77 | [TK_PLUS_ASSIGN] = "+=", 78 | [TK_MINUS_ASSIGN] = "-=", 79 | [TK_MULTIPLY_ASSIGN] = "*=", 80 | [TK_DIVIDE_ASSIGN] = "/=", 81 | [TK_AND_ASSIGN] = "&=", 82 | [TK_OR_ASSIGN] = "|=", 83 | [TK_XOR_ASSIGN] = "^=", 84 | [TK_MOD_ASSIGN] = "%=", 85 | [TK_GEQUAL] = ">=", 86 | [TK_LEQUAL] = "<=", 87 | [TK_T_CHAR] = "char", 88 | [TK_T_SHORT] = "short", 89 | [TK_T_INT] = "int", 90 | [TK_T_FLOAT] = "float", 91 | [TK_T_DOUBLE] = "double", 92 | [TK_T_NUMBER] = "number", 93 | [TK_T_VOID] = "void", 94 | [TK_T_UNSIGNED] = "unsigned", 95 | [TK_T_LONG] = "long", 96 | [TK_CONST] = "const", 97 | [TK_SIZEOF] = "sizeof", 98 | [TK_LSHIFT] = "<<", 99 | [TK_RSHIFT] = ">>", 100 | [TK_EQUAL] = "==", 101 | [TK_NOT_EQUAL] = "!=", 102 | [TK_IF] = "if", 103 | [TK_ELSE] = "else", 104 | [TK_FOR] = "for", 105 | [TK_WHILE] = "while", 106 | [TK_DO] = "do", 107 | [TK_RETURN] = "return", 108 | [TK_BREAK] = "break", 109 | [TK_LOOP] = "loop", 110 | [TK_EMIT] = "emit", 111 | [TK_DOT_THREE_TIMES] = "...", 112 | [TK_PLUS_PLUS] = "++", 113 | [TK_MINUS_MINUS] = "--", 114 | [TK_STRUCT] = "struct", 115 | [TK_UNION] = "union", 116 | [TK_TYPEDEF] = "typedef", 117 | [TK_ARROW] = "->", 118 | [TK_ENUM] = "enum", 119 | [TK_EOF] = "eof"}; 120 | 121 | static const int is_token_printable(int type) 122 | { 123 | if (type >= 0x20 && type <= 0x7e) 124 | return 1; 125 | return 0; 126 | } 127 | 128 | // FIXME: not reentry safe, using static which may change 129 | static const char* token_type_to_string(int type) 130 | { 131 | static char buf[64] = {0}; 132 | if (is_token_printable(type)) 133 | { 134 | snprintf(buf, sizeof(buf), "%c", type); 135 | return buf; 136 | } 137 | return token_type_strings[type]; 138 | } 139 | 140 | struct token 141 | { 142 | int type; 143 | union 144 | { 145 | char string[32]; // C's max identifier length is 31 iirc 146 | scalar_t scalar; 147 | integer_t integer; 148 | double vector[4]; 149 | }; 150 | int lineno; 151 | int start, end; 152 | int character_start; // start can include whitespace from the buffer, character_start is the position where the 153 | // first non whitespace character begins 154 | }; 155 | 156 | static void token_to_string(struct token* t, char* string, size_t n) 157 | { 158 | assert(t != NULL); 159 | if (t->type == -1) 160 | { 161 | snprintf(string, n, "invalid"); 162 | return; 163 | } 164 | switch (t->type) 165 | { 166 | case TK_IDENT: 167 | snprintf(string, n, "type: %s, value: %s", token_type_strings[t->type], t->string); 168 | return; 169 | case TK_INTEGER: 170 | snprintf(string, n, "type: %s, value: %lld", token_type_strings[t->type], t->integer.value); 171 | return; 172 | case TK_FLOAT: 173 | snprintf(string, n, "type: %s, value: %Lf", token_type_strings[t->type], t->scalar.value); 174 | return; 175 | } 176 | } 177 | 178 | #endif 179 | -------------------------------------------------------------------------------- /tools/dump-opcodes.c: -------------------------------------------------------------------------------- 1 | //usage: rasm2 -a x86 -b 64 -d "$(./dump a.out)" 2 | #include 3 | #include 4 | #include 5 | #include "elf.h" 6 | 7 | int read_binary_file(const char* path, unsigned char** pdata, size_t* size) 8 | { 9 | FILE *fp = fopen(path, "rb"); 10 | if(!fp) return 1; 11 | fseek(fp, 0, SEEK_END); 12 | *size = (size_t)ftell(fp); 13 | rewind(fp); 14 | unsigned char *data = malloc(*size); 15 | if(!data) 16 | { 17 | fclose(fp); 18 | return 3; 19 | } 20 | if(fread(data, 1, *size, fp) != *size) 21 | { 22 | free(data); 23 | fclose(fp); 24 | return 2; 25 | } 26 | *pdata = data; 27 | fclose(fp); 28 | return 0; 29 | } 30 | 31 | //temporarily for debugging, read out elf64 values to check stuff 32 | 33 | static void print_flags(int flags) 34 | { 35 | printf("flags: "); 36 | if(flags & PF_X) 37 | printf("X"); 38 | if(flags & PF_R) 39 | printf("R"); 40 | if(flags & PF_W) 41 | printf("W"); 42 | putchar('\n'); 43 | } 44 | 45 | static const char* pt_string(int type) 46 | { 47 | static const char *strings[] = {"null","load","dynamic","interp","note","shlib","phdr","tls",NULL}; 48 | if(type <= 0x7) 49 | return strings[type]; 50 | return "unknown"; 51 | } 52 | 53 | static void print_hex(u8 *buf, size_t n) 54 | { 55 | for (int i = 0; i < n; ++i) 56 | { 57 | printf("%02X%s", buf[i] & 0xff, i + 1 == n ? "" : " "); 58 | } 59 | } 60 | 61 | int main(int argc, char **argv) 62 | { 63 | assert(argc > 1); 64 | const char *file = argv[1]; 65 | unsigned char *data; 66 | size_t size; 67 | if(0 != read_binary_file(file, &data, &size)) 68 | { 69 | //printf("failed to read file '%s'\n", file); 70 | return 0; 71 | } 72 | 73 | //image + 24 = entry 74 | u64 *entry = data + 24; 75 | 76 | u16 *num_program_headers = data + 56; 77 | 78 | //printf("entry = %02X\n", *entry); 79 | //printf("num program headers = %d\n", *num_program_headers); 80 | size_t off = 0x40; 81 | struct phdr64 *hdr = NULL; 82 | for(int i = 0; i < *num_program_headers; ++i) 83 | { 84 | //find code 85 | hdr = (struct phdr64*)&data[off]; 86 | if(hdr->p_flags & PF_X) 87 | { 88 | break; 89 | } 90 | //printf("hdr type=%s\n",pt_string(hdr->p_type)); 91 | //printf("alignment=%d\n",hdr->p_align); 92 | //print_flags(hdr->p_flags); 93 | off += 0x38; 94 | } 95 | assert(hdr->p_flags & PF_X); 96 | //printf("code offset=%d,%d bytes\n",hdr->p_offset,hdr->p_filesz); 97 | print_hex(&data[hdr->p_offset], hdr->p_filesz); 98 | //printf("%d bytes\n", size); 99 | return 0; 100 | } 101 | -------------------------------------------------------------------------------- /types.h: -------------------------------------------------------------------------------- 1 | #ifndef TYPES_H 2 | #define TYPES_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | typedef int8_t i8; 9 | typedef int16_t i16; 10 | typedef int32_t i32; 11 | typedef int64_t i64; 12 | typedef uint8_t u8; 13 | typedef uint16_t u16; 14 | typedef uint32_t u32; 15 | typedef uint64_t u64; 16 | 17 | typedef enum 18 | { 19 | INTEGER_SUFFIX_NONE, 20 | INTEGER_SUFFIX_LONG, 21 | INTEGER_SUFFIX_LONG_LONG, 22 | INTEGER_SUFFIX_SIZE 23 | } integer_suffix_t; 24 | 25 | typedef struct 26 | { 27 | union 28 | { 29 | i64 value; 30 | u64 unsigned_value; 31 | }; 32 | bool is_unsigned; 33 | integer_suffix_t suffix; 34 | 35 | } integer_t; 36 | 37 | typedef enum 38 | { 39 | SCALAR_SUFFIX_NONE, 40 | SCALAR_SUFFIX_FLOAT, 41 | SCALAR_SUFFIX_LONG_DOUBLE 42 | } scalar_suffix_t; 43 | 44 | typedef struct 45 | { 46 | long double value; 47 | scalar_suffix_t suffix; 48 | } scalar_t; 49 | 50 | #ifdef __GNUC__ 51 | #define PACK( __Declaration__ ) __Declaration__ __attribute__((__packed__)) 52 | #endif 53 | 54 | #ifdef _MSC_VER 55 | #define PACK( __Declaration__ ) __pragma( pack(push, 1) ) __Declaration__ __pragma( pack(pop)) 56 | #endif 57 | 58 | #endif 59 | -------------------------------------------------------------------------------- /util.h: -------------------------------------------------------------------------------- 1 | #ifndef UTIL_H 2 | #define UTIL_H 3 | #include "rhd/heap_string.h" 4 | #include 5 | #include "types.h" 6 | 7 | static int dd(heap_string *s, uint32_t i) 8 | { 9 | int sz = heap_string_size( s ); 10 | union 11 | { 12 | uint32_t i; 13 | uint8_t b[4]; 14 | } u = { .i = i }; 15 | 16 | for ( size_t i = 0; i < 4; ++i ) 17 | heap_string_push( s, u.b[i] ); 18 | 19 | return sz; 20 | } 21 | 22 | static int dq(heap_string *s, uint64_t i) 23 | { 24 | int sz = heap_string_size( s ); 25 | union 26 | { 27 | uint64_t i; 28 | uint8_t b[8]; 29 | } u = { .i = i }; 30 | 31 | for ( size_t i = 0; i < 8; ++i ) 32 | heap_string_push( s, u.b[i] ); 33 | 34 | return sz; 35 | } 36 | 37 | static int dw( heap_string* s, uint16_t i ) 38 | { 39 | int sz = heap_string_size( s ); 40 | union 41 | { 42 | uint16_t s; 43 | uint8_t b[2]; 44 | } u = { .s = i }; 45 | 46 | heap_string_push( s, u.b[0] ); 47 | heap_string_push( s, u.b[1] ); 48 | return sz; 49 | } 50 | 51 | static int db(heap_string *s, u8 op) 52 | { 53 | heap_string_push(s, op); 54 | return heap_string_size(s) - 1; 55 | } 56 | 57 | static void pad(heap_string *s, u32 n) 58 | { 59 | for(int i = 0; i < n; ++i) 60 | heap_string_push(s, 0x0); 61 | } 62 | 63 | static int align_to(int pos, int align) 64 | { 65 | if(pos % align == 0) 66 | return pos; //no alignment needed 67 | return align - (pos % align); 68 | } 69 | 70 | static void pad_align(heap_string *s, int align) 71 | { 72 | int pos = heap_string_size(s); 73 | if(pos % align == 0) 74 | return; //no alignment needed 75 | int m = align - (pos % align); 76 | for(int i = 0; i < m; ++i) 77 | heap_string_push(s, 0); 78 | } 79 | 80 | static void buf(heap_string *s, const char *buf, size_t len) 81 | { 82 | for(size_t i = 0; i < len; ++i) 83 | { 84 | heap_string_push(s, buf[i] & 0xff); 85 | } 86 | } 87 | 88 | #endif 89 | -------------------------------------------------------------------------------- /virtual_opcodes.h: -------------------------------------------------------------------------------- 1 | #ifndef VIRTUAL_OPCODES_H 2 | #define VIRTUAL_OPCODES_H 3 | #include 4 | 5 | static const char* vopcode_names[] = { 6 | "add", "sub", "mul", "div", "mod", "fadd", "fsub", "fmul", "fdiv", "fmod", "sitofp", "fptosi", "and", 7 | "or", "xor", "not", "mov", "load", "lea", "store", "push", "pop", "enter", "leave", "call", "ret", 8 | "test", "cmp", "jmp", "jnz", "jz", "jle", "jge", "jg", "jl", "label", "alloca", "hlt", NULL}; 9 | 10 | typedef enum 11 | { 12 | VOP_ADD, 13 | VOP_SUB, 14 | VOP_MUL, 15 | VOP_DIV, 16 | VOP_MOD, 17 | 18 | VOP_FADD, 19 | VOP_FSUB, 20 | VOP_FMUL, 21 | VOP_FDIV, 22 | VOP_FMOD, 23 | 24 | VOP_SITOFP, 25 | VOP_FPTOSI, 26 | 27 | VOP_AND, 28 | VOP_OR, 29 | VOP_XOR, 30 | VOP_NOT, 31 | 32 | VOP_MOV, 33 | VOP_LOAD, 34 | VOP_LEA, 35 | 36 | VOP_STORE, 37 | 38 | VOP_PUSH, 39 | VOP_POP, 40 | 41 | VOP_ENTER, 42 | VOP_LEAVE, 43 | 44 | VOP_CALL, 45 | VOP_RET, 46 | 47 | VOP_TEST, 48 | VOP_CMP, 49 | 50 | VOP_JMP, 51 | VOP_JNZ, 52 | VOP_JZ, 53 | VOP_JLE, 54 | VOP_JGE, 55 | VOP_JG, 56 | VOP_JL, 57 | VOP_LABEL, 58 | VOP_ALLOCA, 59 | VOP_HLT 60 | } vopcode_t; 61 | 62 | static bool vopcode_overwrites_first_operand(vopcode_t op) 63 | { 64 | return op <= VOP_LEA; 65 | } 66 | 67 | #endif 68 | -------------------------------------------------------------------------------- /vm.c: -------------------------------------------------------------------------------- 1 | //basic x86 opcode virtual machine 2 | 3 | #include "compile.h" 4 | #include "types.h" 5 | #include 6 | #include 7 | #include 8 | 9 | #define VERBOSE 10 | 11 | typedef int32_t regval_t; 12 | 13 | #define COUNT_OF(x) (sizeof(x) / sizeof(x[0])) 14 | 15 | static void verbose_printf(const char *format, ...) 16 | { 17 | va_list va; 18 | va_start(va, format); 19 | char buf[16384]; 20 | #ifdef VERBOSE 21 | vsnprintf(buf, sizeof(buf), format, va); 22 | printf("%s\n", buf); 23 | #endif 24 | va_end(va); 25 | } 26 | 27 | typedef struct 28 | { 29 | regval_t registers[REGISTER_X86_MAX]; 30 | u8 *mem; 31 | regval_t memsz; 32 | int nreadonly; 33 | } vm_t; 34 | 35 | regval_t* query_register(vm_t *vm, int reg) 36 | { 37 | return &vm->registers[reg]; 38 | } 39 | 40 | static u8 *get_reference_to_vm_memory_address(vm_t *vm, regval_t addr) 41 | { 42 | assert(addr >= 0 && addr < vm->memsz); 43 | return (u8*)&vm->mem[addr]; 44 | } 45 | 46 | u8 fetch_opcode(vm_t *vm) 47 | { 48 | /* 49 | u8 op; 50 | scanf("%02hhX", &op); 51 | return op; 52 | */ 53 | u8 op = *get_reference_to_vm_memory_address(vm, vm->registers[EIP]++); 54 | verbose_printf("0x%02X\n", op); 55 | return op; 56 | } 57 | 58 | int fetch_instruction(vm_t *vm) 59 | { 60 | //verbose_printf("instruction?\n"); 61 | return fetch_opcode(vm); 62 | } 63 | 64 | int fetch_operand(vm_t *vm) 65 | { 66 | //verbose_printf("operand?\n"); 67 | return fetch_opcode(vm); 68 | } 69 | 70 | u8 register_byte_value(regval_t value, int index) 71 | { 72 | union 73 | { 74 | regval_t value; 75 | u8 b[4]; 76 | } u; 77 | u.value = value; 78 | return u.b[index]; 79 | } 80 | 81 | regval_t fetch_register_value(vm_t *vm) 82 | { 83 | union 84 | { 85 | regval_t value; 86 | u8 b[4]; 87 | } u; 88 | u.b[0] = fetch_operand(vm); 89 | u.b[1] = fetch_operand(vm); 90 | u.b[2] = fetch_operand(vm); 91 | u.b[3] = fetch_operand(vm); 92 | return u.value; 93 | } 94 | 95 | enum x86_vm_error_code 96 | { 97 | VM_OK, 98 | VM_HALT, 99 | VM_ERR_UNHANDLED_OPERAND, 100 | VM_ERR_INVALID_OPCODE, 101 | VM_ERR_UNHANDLED_SYSCALL 102 | }; 103 | 104 | static u8 *stack(vm_t *vm) 105 | { 106 | assert(vm->registers[ESP] >= vm->nreadonly); 107 | u8 *stack = get_reference_to_vm_memory_address(vm, vm->registers[ESP]); 108 | return stack; 109 | } 110 | 111 | static void push(vm_t *vm, int reg) 112 | { 113 | vm->registers[ESP] -= sizeof(regval_t); 114 | *(regval_t*)stack(vm) = vm->registers[reg]; 115 | } 116 | 117 | static regval_t pop(vm_t *vm, int reg) 118 | { 119 | vm->registers[reg] = *(regval_t*)stack(vm); 120 | vm->registers[ESP] += sizeof(regval_t); 121 | return vm->registers[reg]; 122 | } 123 | 124 | static void set_memory_value(vm_t *vm, int index, regval_t value) 125 | { 126 | //make sure it's writable 127 | assert(index >= vm->nreadonly); 128 | assert(index >= 0 && index < vm->memsz); 129 | vm->mem[index] = value; 130 | } 131 | 132 | static regval_t get_memory_value(vm_t *vm, int index) 133 | { 134 | assert(index >= 0 && index < vm->memsz); 135 | return vm->mem[index]; 136 | } 137 | 138 | static u8 *get_memory_pointer(vm_t *vm, int index) 139 | { 140 | assert(index >= 0 && index < vm->memsz); 141 | return &vm->mem[index]; 142 | } 143 | 144 | static void set_flags(vm_t *vm, regval_t result) 145 | { 146 | vm->registers[REGISTER_X86_FLAGS] = 0; 147 | if(result < 0) 148 | vm->registers[REGISTER_X86_FLAGS] |= X86_SIGN_FLAG; 149 | if(result == 0) 150 | vm->registers[REGISTER_X86_FLAGS] |= X86_ZERO_FLAG; 151 | } 152 | 153 | static void dump_vm_state(vm_t *vm) 154 | { 155 | verbose_printf("flags: %d\n", vm->registers[REGISTER_X86_FLAGS]); 156 | verbose_printf("registers:\n"); 157 | for(int i = 0; i < 8; ++i) 158 | { 159 | verbose_printf("\t%s: %d\n", register_x86_names[i], vm->registers[i]); 160 | } 161 | } 162 | 163 | static void cmp(vm_t *vm, regval_t a, regval_t b) 164 | { 165 | regval_t c = a - b; 166 | set_flags(vm, c); 167 | if((a > 0 && b > INT_MAX - a) || (a < 0 && b < INT_MIN - a)) //overflow or underflow 168 | vm->registers[REGISTER_X86_FLAGS] |= X86_OVERFLOW_FLAG; 169 | } 170 | 171 | int execute_vm(vm_t *vm) 172 | { 173 | int opcode = fetch_instruction(vm); 174 | switch(opcode) 175 | { 176 | //mov byte ptr [ebx], al 177 | case 0x88: 178 | { 179 | int operand = fetch_operand(vm); 180 | assert(operand == 0x03); 181 | u8 *ptr = (u8*)get_memory_pointer(vm, vm->registers[EBX]); 182 | *ptr = register_byte_value(vm->registers[EAX], 0); 183 | verbose_printf("mov byte ptr [ebx], al\n"); 184 | } break; 185 | 186 | case 0xf7: 187 | { 188 | int operand = fetch_operand(vm); 189 | switch(operand) 190 | { 191 | //not eax 192 | case 0xd0: 193 | verbose_printf("not eax\n"); 194 | vm->registers[EAX] = ~vm->registers[EAX]; 195 | break; 196 | 197 | //imul esi 198 | case 0xee: 199 | verbose_printf("imul esi\n"); 200 | vm->registers[EAX] *= vm->registers[ESI]; 201 | break; 202 | 203 | default: 204 | return VM_ERR_UNHANDLED_OPERAND; 205 | } 206 | } break; 207 | 208 | //add r32, r32 209 | case 0x1: 210 | { 211 | int operand = fetch_operand(vm); 212 | switch(operand) 213 | { 214 | //add [ebx],eax 215 | case 0x03: 216 | { 217 | regval_t *ptr = (regval_t*)get_memory_pointer(vm, vm->registers[EBX]); 218 | *ptr += vm->registers[EAX]; 219 | verbose_printf("add [ebx], eax\n"); 220 | } break; 221 | 222 | default: 223 | { 224 | if(operand < 0xc0 || operand > 0xff) 225 | return VM_ERR_INVALID_OPCODE; 226 | operand -= 0xc0; 227 | int dstreg = operand % 8; 228 | int srcreg = (operand - dstreg) / 8; 229 | vm->registers[dstreg] += vm->registers[srcreg]; 230 | verbose_printf("add %s, %s\n", register_x86_names[dstreg], register_x86_names[srcreg]); 231 | } break; 232 | } 233 | } break; 234 | 235 | //xor r32, r32 236 | case 0x31: 237 | { 238 | int operand = fetch_operand(vm); 239 | if(operand < 0xc0 || operand > 0xff) 240 | return VM_ERR_INVALID_OPCODE; 241 | operand -= 0xc0; 242 | int dstreg = operand % 8; 243 | int srcreg = (operand - dstreg) / 8; 244 | vm->registers[dstreg] ^= vm->registers[srcreg]; 245 | verbose_printf("xor %s, %s\n", register_x86_names[dstreg], register_x86_names[srcreg]); 246 | } break; 247 | 248 | //sub r32, r32 249 | case 0x29: 250 | { 251 | int operand = fetch_operand(vm); 252 | if(operand < 0xc0 || operand > 0xff) 253 | return VM_ERR_INVALID_OPCODE; 254 | operand -= 0xc0; 255 | int dstreg = operand % 8; 256 | int srcreg = (operand - dstreg) / 8; 257 | vm->registers[dstreg] -= vm->registers[srcreg]; 258 | verbose_printf("sub %s, %s\n", register_x86_names[dstreg], register_x86_names[srcreg]); 259 | } break; 260 | 261 | case 0x89: 262 | { 263 | int operand = fetch_operand(vm); 264 | switch(operand) 265 | { 266 | //mov ebp, esp 267 | case 0xe5: 268 | { 269 | vm->registers[EBP] = vm->registers[ESP]; 270 | verbose_printf("mov ebp, esp\n"); 271 | } break; 272 | 273 | case 0xec: 274 | { 275 | vm->registers[ESP] = vm->registers[EBP]; 276 | verbose_printf("mov esp, ebp\n"); 277 | } break; 278 | 279 | case 0xd8: 280 | { 281 | vm->registers[EAX] = vm->registers[EBX]; 282 | verbose_printf("mov eax, ebx\n"); 283 | } break; 284 | 285 | case 0xc1: 286 | { 287 | vm->registers[ECX] = vm->registers[EAX]; 288 | verbose_printf("mov ecx, eax\n"); 289 | } break; 290 | 291 | case 0x03: 292 | { 293 | verbose_printf("mov [ebx], eax\n"); 294 | set_memory_value(vm, vm->registers[EBX], vm->registers[EAX]); 295 | } break; 296 | 297 | case 0xc3: 298 | { 299 | verbose_printf("mov ebx, eax\n"); 300 | vm->registers[EBX] = vm->registers[EAX]; 301 | } break; 302 | 303 | default: 304 | return VM_ERR_UNHANDLED_OPERAND; 305 | } 306 | } break; 307 | 308 | case 0x81: 309 | { 310 | int operand = fetch_operand(vm); 311 | switch(operand) 312 | { 313 | //add ebx, imm32 314 | case 0xc3: 315 | { 316 | regval_t value = fetch_register_value(vm); 317 | vm->registers[EBX] += value; 318 | verbose_printf("add ebx, 0x%x\n", value); 319 | } break; 320 | //sub esp, imm32 321 | case 0xec: 322 | { 323 | regval_t value = fetch_register_value(vm); 324 | vm->registers[ESP] -= value; 325 | verbose_printf("sub esp, 0x%x\n", value); 326 | } break; 327 | 328 | default: 329 | return VM_ERR_UNHANDLED_OPERAND; 330 | } 331 | } break; 332 | 333 | case 0xff: 334 | { 335 | int operand = fetch_operand(vm); 336 | switch(operand) 337 | { 338 | //call dword ptr 339 | case 0x15: 340 | { 341 | regval_t addr = fetch_register_value(vm); 342 | verbose_printf("call dword [0x%x]\n", addr); 343 | push(vm, EIP); //save our current instruction pointer, restore after ret 344 | //TODO: FIXME dereference value at operand's location and set EIP to that. 345 | //vm->registers[EIP] = fetch_register_value(vm); 346 | } break; 347 | 348 | //call eax 349 | case 0xd0: 350 | push(vm, EIP); 351 | vm->registers[EIP] = vm->registers[EAX]; 352 | verbose_printf("call eax\n"); 353 | break; 354 | 355 | //inc [r32] 356 | case 0x00: 357 | case 0x01: 358 | case 0x02: 359 | case 0x03: 360 | case 0x04: 361 | case 0x05: 362 | case 0x06: 363 | case 0x07: 364 | { 365 | regval_t *ptr = (regval_t*)get_memory_pointer(vm, vm->registers[operand]); 366 | *ptr += 1; 367 | } break; 368 | 369 | default: 370 | return VM_ERR_INVALID_OPCODE; 371 | } 372 | } break; 373 | 374 | //mov r32, imm32 375 | case 0xb8: 376 | case 0xb9: 377 | case 0xba: 378 | case 0xbb: 379 | case 0xbc: 380 | case 0xbd: 381 | case 0xbe: 382 | case 0xbf: 383 | { 384 | int reg = opcode - 0xb8; 385 | regval_t value = fetch_register_value(vm); 386 | verbose_printf("mov %s, 0x%x\n", register_x86_names[reg], value); 387 | vm->registers[reg] = value; 388 | } break; 389 | 390 | //inc r32 391 | case 0x40: 392 | case 0x41: 393 | case 0x42: 394 | case 0x43: 395 | case 0x44: 396 | case 0x45: 397 | case 0x46: 398 | case 0x47: 399 | { 400 | int reg = opcode - 0x40; 401 | verbose_printf("inc %s\n", register_x86_names[reg]); 402 | vm->registers[reg] += 1; 403 | } break; 404 | 405 | //push r32 406 | case 0x50: //eax 407 | case 0x51: //ecx 408 | case 0x52: //edx 409 | case 0x53: //ebx 410 | case 0x54: //esp 411 | case 0x55: //ebp 412 | case 0x56: //esi 413 | case 0x57: //edi 414 | { 415 | int reg = opcode - 0x50; 416 | verbose_printf("push %s\n", register_x86_names[reg]); 417 | push(vm, reg); 418 | } break; 419 | 420 | //ret 421 | case 0xc3: 422 | { 423 | verbose_printf("ret\n"); 424 | regval_t addr = pop(vm, EIP); 425 | vm->registers[EIP] = addr; 426 | } break; 427 | 428 | //call imm32 429 | case 0xe8: 430 | { 431 | regval_t addr = fetch_register_value(vm); 432 | verbose_printf("call 0x%x\n", addr); 433 | push(vm, EIP); 434 | vm->registers[EIP] += addr; 435 | } break; 436 | 437 | case 0x83: 438 | { 439 | int operand = fetch_operand(vm); 440 | switch(operand) 441 | { 442 | //cmp eax, rel8 443 | case 0xf8: 444 | { 445 | int rel8 = fetch_operand(vm); 446 | verbose_printf("cmp eax, 0x%x\n", rel8); 447 | cmp(vm, vm->registers[EAX], rel8); 448 | } break; 449 | 450 | //add esp, rel8 451 | case 0xc4: 452 | { 453 | int rel8 = fetch_operand(vm); 454 | verbose_printf("add esp, 0x%x\n", rel8); 455 | vm->registers[ESP] += rel8; 456 | } break; 457 | 458 | default: 459 | return VM_ERR_UNHANDLED_OPERAND; 460 | } 461 | } break; 462 | 463 | //cmp eax, ecx 464 | case 0x39: 465 | { 466 | int operand = fetch_operand(vm); 467 | assert(operand == 0xc8); 468 | verbose_printf("cmp eax, ecx\n"); 469 | int a = vm->registers[EAX]; 470 | int b = vm->registers[ECX]; 471 | cmp(vm, a, b); 472 | } break; 473 | 474 | case 0x8b: 475 | { 476 | int operand = fetch_operand(vm); 477 | switch(operand) 478 | { 479 | case 0x1b: 480 | { 481 | verbose_printf("mov ebx, [ebx]\n"); 482 | vm->registers[EBX] = get_memory_value(vm, vm->registers[EBX]); 483 | } break; 484 | 485 | case 0x3: 486 | verbose_printf("mov eax, [ebx]\n"); 487 | vm->registers[EAX] = get_memory_value(vm, vm->registers[EBX]); 488 | break; 489 | 490 | default: 491 | if(operand < 0x85 || operand > (0x85 + 64)) 492 | return VM_ERR_UNHANDLED_OPERAND; 493 | int reg = (operand - 0x85) / 8; 494 | regval_t offset = fetch_register_value(vm); 495 | verbose_printf("mov %s, [ebp + 0x%x]\n", register_x86_names[reg], offset); 496 | vm->registers[reg] = get_memory_value(vm, vm->registers[EBP] + offset); 497 | break; 498 | } 499 | } break; 500 | 501 | case 0x8d: 502 | { 503 | int operand = fetch_operand(vm); 504 | switch(operand) 505 | { 506 | //lea edx,[ebx] 507 | case 0x13: 508 | { 509 | verbose_printf("lea edx, [ebx]\n"); 510 | vm->registers[EDX] = vm->registers[EBX]; 511 | } break; 512 | 513 | //lea r32, [ebp + offset] 514 | default: 515 | { 516 | if(operand < 0x85 || operand > (0x85 + 64)) 517 | return VM_ERR_INVALID_OPCODE; 518 | int reg = (operand - 0x85) / 8; 519 | regval_t offset = fetch_register_value(vm); 520 | verbose_printf("lea %s, [ebp + 0x%x]\n", register_x86_names[reg], offset); 521 | vm->registers[reg] = vm->registers[EBP] + offset; 522 | } break; 523 | } 524 | } break; 525 | 526 | //int imm8 527 | case 0xcd: 528 | { 529 | int operand = fetch_operand(vm); 530 | switch(operand) 531 | { 532 | //linux x86 syscall 533 | case 0x80: 534 | { 535 | verbose_printf("int 0x80\n"); 536 | switch(vm->registers[EAX]) 537 | { 538 | //SYS_exit 539 | case 0x1: 540 | return VM_HALT; 541 | //SYS_write 542 | case 0x4: 543 | { 544 | //verbose_printf("write(%d, %d, %d)\n", vm->registers[EBX], vm->registers[ECX], vm->registers[EDX]); 545 | int write(int filedes, const void *buf, unsigned int nbyte); 546 | u8 *ecxbuf = get_memory_pointer(vm, vm->registers[ECX]); 547 | write(vm->registers[EBX], (void*)ecxbuf, vm->registers[EDX]); 548 | } break; 549 | default: 550 | return VM_ERR_UNHANDLED_SYSCALL; 551 | } 552 | } break; 553 | 554 | default: 555 | return VM_ERR_UNHANDLED_OPERAND; 556 | } 557 | } break; 558 | 559 | //pop r32 560 | case 0x58: //eax 561 | case 0x59: //ecx 562 | case 0x5a: //edx 563 | case 0x5b: //ebx 564 | case 0x5c: //esp 565 | case 0x5d: //ebp 566 | case 0x5e: //esi 567 | case 0x5f: //edi 568 | { 569 | int reg = opcode - 0x58; 570 | verbose_printf("pop %s\n", register_x86_names[reg]); 571 | pop(vm, reg); 572 | } break; 573 | 574 | //jmp rel32 575 | case 0xe9: 576 | { 577 | regval_t rel32 = fetch_register_value(vm); 578 | verbose_printf("jmp %x (%d)\n", rel32 + 5, rel32 + 5); 579 | vm->registers[EIP] += rel32; 580 | } break; 581 | 582 | //test r32, r32 583 | case 0x85: 584 | { 585 | int operand = fetch_operand(vm); 586 | if(operand < 0xc0 || operand > 0xff) 587 | return VM_ERR_INVALID_OPCODE; 588 | operand -= 0xc0; 589 | int dstreg = operand % 8; 590 | int srcreg = (operand - dstreg) / 8; 591 | regval_t result = vm->registers[dstreg] & vm->registers[srcreg]; 592 | set_flags(vm, result); 593 | verbose_printf("test %s, %s\n", register_x86_names[dstreg], register_x86_names[srcreg]); 594 | } break; 595 | 596 | case 0x0f: 597 | { 598 | int operand = fetch_operand(vm); 599 | switch(operand) 600 | { 601 | default: 602 | return VM_ERR_UNHANDLED_OPERAND; 603 | 604 | case 0xb6: 605 | { 606 | operand = fetch_operand(vm); 607 | switch(operand) 608 | { 609 | case 0xc0: 610 | verbose_printf("movzx eax, al\n"); 611 | vm->registers[EAX] = register_byte_value(vm->registers[EAX], 0); 612 | break; 613 | 614 | case 0x03: 615 | verbose_printf("movzx eax, byte [ebx]\n"); 616 | vm->registers[EAX] = register_byte_value(get_memory_value(vm, vm->registers[EBX]), 0); 617 | break; 618 | } 619 | } break; 620 | 621 | //jz rel32 622 | case 0x84: 623 | { 624 | regval_t rel32 = fetch_register_value(vm); 625 | verbose_printf("jz %x (%d)\n", rel32 + 6, rel32 + 6); 626 | if((vm->registers[REGISTER_X86_FLAGS] & X86_ZERO_FLAG) == X86_ZERO_FLAG) 627 | vm->registers[EIP] += rel32; 628 | } break; 629 | } 630 | } break; 631 | 632 | //jge rel8 633 | case 0x7d: 634 | { 635 | int rel8 = fetch_operand(vm); 636 | verbose_printf("jge %x (%d)\n", rel8 + 2, rel8 + 2); 637 | int of = (vm->registers[REGISTER_X86_FLAGS] & X86_OVERFLOW_FLAG) == X86_OVERFLOW_FLAG; 638 | int sf = (vm->registers[REGISTER_X86_FLAGS] & X86_SIGN_FLAG) == X86_SIGN_FLAG; 639 | if(of == sf) 640 | vm->registers[EIP] += rel8; 641 | } break; 642 | 643 | //jne rel8 644 | case 0x75: 645 | { 646 | int rel8 = fetch_operand(vm); 647 | verbose_printf("jne %x (%d)\n", rel8 + 2, rel8 + 2); 648 | int zf = (vm->registers[REGISTER_X86_FLAGS] & X86_ZERO_FLAG) == X86_ZERO_FLAG; 649 | if(zf == 0) 650 | vm->registers[EIP] += rel8; 651 | } break; 652 | 653 | //jmp rel8 654 | case 0xeb: 655 | { 656 | int rel8 = fetch_operand(vm); 657 | verbose_printf("jmp %x (%d)\n", rel8 + 2, rel8 + 2); 658 | vm->registers[EIP] += rel8; 659 | } break; 660 | 661 | //hlt 662 | case 0xf4: 663 | return VM_HALT; 664 | 665 | //nop 666 | case 0x90: 667 | verbose_printf("nop\n"); 668 | break; 669 | 670 | default: 671 | verbose_printf("invalid opcode %d (0x%x)\n", opcode, opcode); 672 | return VM_ERR_INVALID_OPCODE; 673 | } 674 | return VM_OK; 675 | } 676 | 677 | int dec(int c) 678 | { 679 | if(c >= '0' && c <= '9') 680 | return c - '0'; 681 | if(c >= 'a' && c <= 'f') 682 | return c - 'a' + 10; 683 | if(c >= 'A' && c <= 'F') 684 | return c - 'A' + 10; 685 | return 0; 686 | } 687 | 688 | int hex2dec(const char *str) 689 | { 690 | int sum = 0; 691 | const char *p = str; 692 | if(strchr(p, 'x')) 693 | p = strchr(p, 'x') + 1; 694 | size_t l = strlen(p); 695 | for(size_t i = 0; i < l; ++i) 696 | sum += dec(p[i]) << (4 * (l - i - 1)); 697 | return sum; 698 | } 699 | 700 | int main(int argc, char **argv) 701 | { 702 | vm_t vm; 703 | memset(&vm, 0, sizeof(vm)); 704 | 705 | #define MEMSZ (0xffff * 2) 706 | u8 vm_memory[MEMSZ]; 707 | memset(vm_memory, 0, sizeof(vm_memory)); 708 | vm.mem = vm_memory; 709 | vm.memsz = MEMSZ; 710 | 711 | int memidx = 0; 712 | //copy the code segment/data segment into vm memory 713 | for(int i = 1; i < argc; i++) 714 | { 715 | int d = hex2dec(argv[i]); 716 | vm_memory[memidx++] = d; 717 | } 718 | vm.registers[EIP] = 0; //set instruction pointer to location where we are in instr 719 | vm.nreadonly = memidx; 720 | vm.registers[ESP] = 0xffff; 721 | while(1) 722 | { 723 | int err = execute_vm(&vm); 724 | if(err != 0) 725 | { 726 | if(err != VM_HALT) 727 | { 728 | printf("Error: %d\n", err); 729 | } 730 | break; 731 | } 732 | #ifdef VERBOSE 733 | //dump_vm_state(&vm); 734 | getchar(); 735 | #endif 736 | } 737 | return 0; 738 | } -------------------------------------------------------------------------------- /x64.c: -------------------------------------------------------------------------------- 1 | //TODO: implement all opcodes we'll be using so we can keep track of the registers and their values 2 | //TODO: replace our "real" registers with "virtual" registers 3 | //rasm2 -b 64 -d "$(gcc -w -g test.c compile.c ast.c lex.c parse.c x64.c && ./a.out)" 4 | 5 | #include "std.h" 6 | #include "token.h" 7 | #include "rhd/linked_list.h" 8 | #include 9 | #include 10 | #include "buffer_util.h" 11 | #include "compile.h" 12 | #include "codegen.h" 13 | 14 | static const char *x64_register_strings[] = {"AL","BL","CL","DL","AH","BH","CH","DH","AX","BX","CX","DX","EAX","ECX","EDX","EBX","ESP","EBP","ESI","EDI","R8B","R9B","R10B","R11B","R12B","R13B","R14B","R15B","R8W","R9W","R10W","R11W","R12W","R13W","R14W","R15W","R8D","R9D","R10D","R11D","R12D","R13D","R14D","R15D","RAX","RCX","RDX","RBX","RSP","RBP","RSI","RDI","R8","R9","R10","R11","R12","R13","R14","R15","XMM0","XMM1","XMM2","XMM3","XMM4","XMM5","XMM6","XMM7","YMM0","YMM1","YMM2","YMM3","YMM4","YMM5","YMM6","YMM7",NULL}; 15 | 16 | typedef enum 17 | { 18 | AL,BL,CL,DL, //lower 8-bit registers 19 | AH,BH,CH,DH, //upper 8-bit registers 20 | AX,BX,CX,DX, //16-bit registers 21 | EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI, //32-bit registers 22 | R8B,R9B,R10B,R11B,R12B,R13B,R14B,R15B, //lowermost 8-bits register 23 | R8W,R9W,R10W,R11W,R12W,R13W,R14W,R15W, //lowermost 16-bits register 24 | R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D, //lowermost 32-bits register 25 | RAX,RCX,RDX,RBX,RSP,RBP,RSI,RDI,R8,R9,R10,R11,R12,R13,R14,R15, //64-bit registers 26 | XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7, //SSE2 27 | YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7, //AVX 28 | X64_REGISTER_MAX 29 | } x64_register_t; 30 | 31 | //list of registers that overlap due to being lower N bits 32 | static const int x64_register_slots[][6] = { 33 | {RAX, EAX, AX, AL, AH, -1}, 34 | {RCX, ECX, CX, CL, CH, -1}, 35 | {RDX, EDX, DX, DL, DH, -1}, 36 | {RBX, EBX, BX, BL, BH, -1}, 37 | {RSP, ESP, -1}, 38 | {RBP, EBP, -1}, 39 | {RSI, ESI, -1}, 40 | {RDI, EDI, -1}, 41 | {R8, R8D, R8W, R8B, -1}, 42 | {R9, R9D, R9W, R9B, -1}, 43 | {R10, R10D, R10W, R10B, -1}, 44 | {R11, R11D, R11W, R11B, -1}, 45 | {R12, R12D, R12W, R12B, -1}, 46 | {R13, R13D, R13W, R13B, -1}, 47 | {R14, R14D, R14W, R14B, -1}, 48 | {R15, R15D, R15W, R15B, -1}, 49 | {XMM0, YMM0, -1}, 50 | {XMM1, YMM1, -1}, 51 | {XMM2, YMM2, -1}, 52 | {XMM3, YMM3, -1}, 53 | {XMM4, YMM4, -1}, 54 | {XMM5, YMM5, -1}, 55 | {XMM6, YMM6, -1}, 56 | {XMM7, YMM7, -1} 57 | }; 58 | 59 | static struct 60 | { 61 | int bits; 62 | reg_t registers[17]; 63 | } x64_register_bits[] = { 64 | {256, {YMM0,YMM1,YMM2,YMM3,YMM4,YMM5,YMM6,YMM7,-1}}, 65 | {128, {XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,-1}}, 66 | {64, {RAX,RCX,RDX,RBX,/*RSP,RBP,*/RSI,RDI,R8,R9,R10,R11,R12,R13,R14,R15,-1}}, 67 | {32, {EAX,ECX,EDX,EBX,/*ESP,EBP,*/ESI,EDI,R8D,R9D,R10D,R11D,R12D,R13D,R14D,R15D,-1}}, 68 | {16, {AX,BX,CX,DX,R8W,R9W,R10W,R11W,R12W,R13W,R14W,R15W,-1}}, 69 | {8, {AL,BL,CL,DL,AH,BH,CH,DH,R8B,R9B,R10B,R11B,R12B,R13B,R14B,R15B,-1}} 70 | }; 71 | 72 | typedef enum 73 | { 74 | CF = 1, 75 | PF = 0x4, 76 | AF = 0x10, 77 | ZF = 0x40, 78 | SF = 0x80, 79 | TP = 0x100, 80 | IF = 0x200, 81 | DF = 0x400, 82 | OF = 0x800 83 | } x64_flags_t; 84 | 85 | static int reg_count_bits(compiler_t *ctx, x64_register_t reg) 86 | { 87 | return ctx->cg.reginfo[reg].bits; 88 | } 89 | 90 | static x64_register_t lower_half_register_bits(x64_register_t reg) 91 | { 92 | if(reg >= AL && reg <= DH) //8-bits is the lowest we can go 93 | return -1; 94 | if(reg >= AX && reg <= DX) //TODO: add upper_half_register_bits if we wanted to access those 95 | return AL + (reg - AX); 96 | if(reg >= EAX && reg <= EAX) 97 | return AX + (reg - EAX); 98 | if(reg >= RAX && reg <= RDI) 99 | return EAX + (reg - RAX); 100 | //TODO: add the other registers aswell 101 | perror("invalid register for lower_half_register_bits"); 102 | return -1; 103 | } 104 | 105 | static void push(compiler_t *ctx, reg_t reg) 106 | { 107 | assert(reg >= RAX && reg <= RDI); 108 | db(ctx, 0x50 + (reg - RAX)); 109 | } 110 | 111 | static reg_t least_used_compatible_register(compiler_t *ctx, int bits) 112 | { 113 | reg_t *registers = NULL; 114 | for(int i = 0; i < COUNT_OF(x64_register_bits); ++i) 115 | { 116 | if(x64_register_bits[i].bits == bits) 117 | { 118 | registers = x64_register_bits[i].registers; 119 | break; 120 | } 121 | } 122 | assert(registers); 123 | reg_t reg = registers[0]; 124 | for(size_t i = 1; registers[i] != -1; ++i) 125 | { 126 | reginfo_t *info = &ctx->cg.reginfo[registers[i]]; 127 | if(info->usecount < ctx->cg.reginfo[reg].usecount) 128 | { 129 | reg = registers[i]; 130 | } 131 | } 132 | return reg; 133 | } 134 | 135 | //what if we have more virtual registers than real registers 136 | //then two virtual registers could be assigned to the same real register 137 | //maybe add a stack and keep track of which virtual register is currently used for which register 138 | //just like in the VREG map/unmap 139 | 140 | static reg_t map_reg(compiler_t *ctx, vreg_t vreg) 141 | { 142 | reg_t reg = -1; 143 | switch(vreg) 144 | { 145 | case VREG_ANY: 146 | case VREG64_ANY: 147 | reg = least_used_compatible_register(ctx, 64); 148 | break; 149 | case VREG32_ANY: 150 | reg = least_used_compatible_register(ctx, 32); 151 | break; 152 | case VREG16_ANY: 153 | reg = least_used_compatible_register(ctx, 16); 154 | break; 155 | case VREG8_ANY: 156 | reg = least_used_compatible_register(ctx, 8); 157 | break; 158 | 159 | case VREG_0: 160 | case VREG_1: 161 | case VREG_2: 162 | case VREG_3: 163 | reg = RAX + (vreg - VREG_0); 164 | break; 165 | case VREG64_0: 166 | case VREG64_1: 167 | case VREG64_2: 168 | case VREG64_3: 169 | reg = RAX + (vreg - VREG64_0); 170 | break; 171 | 172 | case VREG32_0: 173 | case VREG32_1: 174 | case VREG32_2: 175 | case VREG32_3: 176 | reg = EAX + (vreg - VREG32_0); 177 | break; 178 | 179 | case VREG_SP: 180 | reg = RSP; 181 | break; 182 | case VREG_BP: 183 | reg = RBP; 184 | break; 185 | } 186 | //printf("vreg=%s,reg=%d,%s\n",vreg_names[vreg],reg,x64_register_strings[reg]); 187 | //assert(reg >= RAX && reg <= RDI); 188 | if(ctx->cg.reginfo[reg].usecount > 0) 189 | { 190 | push(ctx, reg); 191 | } 192 | ++ctx->cg.reginfo[reg].usecount; 193 | return reg; 194 | } 195 | 196 | static void pop(compiler_t *ctx, reg_t reg) 197 | { 198 | assert(reg >= RAX && reg <= RDI); 199 | db(ctx, 0x58 + (reg - RAX)); 200 | } 201 | 202 | static void unmap_reg(compiler_t *ctx, reg_t reg) 203 | { 204 | if(ctx->cg.reginfo[reg].usecount > 1) 205 | { 206 | pop(ctx, reg); 207 | --ctx->cg.reginfo[reg].usecount; 208 | } 209 | } 210 | 211 | static void nop(compiler_t *ctx) 212 | { 213 | db(ctx, 0x90); 214 | } 215 | 216 | //static void add(void) { printf("function '%s' is not implemented!", __FUNCTION__); } 217 | static void sub(void) { printf("function '%s' is not implemented!", __FUNCTION__); } 218 | static void mod(void) { printf("function '%s' is not implemented!", __FUNCTION__); } 219 | static void imul(void) { printf("function '%s' is not implemented!", __FUNCTION__); } 220 | static void idiv(void) { printf("function '%s' is not implemented!", __FUNCTION__); } 221 | static void add_imm8_to_r32(void) { printf("function '%s' is not implemented!", __FUNCTION__); } 222 | static void add_imm32_to_r32(void) { printf("function '%s' is not implemented!", __FUNCTION__); } 223 | static void inc(void) { printf("function '%s' is not implemented!", __FUNCTION__); } 224 | static void neg(void) { printf("function '%s' is not implemented!", __FUNCTION__); } 225 | //static void sub_regn_imm32(void) { printf("function '%s' is not implemented!", __FUNCTION__); } 226 | //static void xor(void) { printf("function '%s' is not implemented!", __FUNCTION__); } 227 | static void and(void) { printf("function '%s' is not implemented!", __FUNCTION__); } 228 | static void or(void) { printf("function '%s' is not implemented!", __FUNCTION__); } 229 | static void int3(void) { printf("function '%s' is not implemented!", __FUNCTION__); } 230 | static void invoke_syscall(void) { printf("function '%s' is not implemented!", __FUNCTION__); } 231 | static void exit_instr(void) { printf("function '%s' is not implemented!", __FUNCTION__); } 232 | //static void push(void) { printf("function '%s' is not implemented!", __FUNCTION__); } 233 | //static void pop(void) { printf("function '%s' is not implemented!", __FUNCTION__); } 234 | static void load_reg(void) { printf("function '%s' is not implemented!", __FUNCTION__); } 235 | static void store_reg(void) { printf("function '%s' is not implemented!", __FUNCTION__); } 236 | static void load_regn_base_offset_imm32(void) { printf("function '%s' is not implemented!", __FUNCTION__); } 237 | static void ret(void) { printf("function '%s' is not implemented!", __FUNCTION__); } 238 | static void indirect_call_imm32(void) { printf("function '%s' is not implemented!", __FUNCTION__); } 239 | static void call_imm32(void) { printf("function '%s' is not implemented!", __FUNCTION__); } 240 | static void call_r32(void) { printf("function '%s' is not implemented!", __FUNCTION__); } 241 | //static void mov_r_imm32(void) { printf("function '%s' is not implemented!", __FUNCTION__); } 242 | static void mov_r_string(void) { printf("function '%s' is not implemented!", __FUNCTION__); } 243 | //static void mov(void) { printf("function '%s' is not implemented!", __FUNCTION__); } 244 | static void cmp(void) { printf("function '%s' is not implemented!", __FUNCTION__); } 245 | static void test(void) { printf("function '%s' is not implemented!", __FUNCTION__); } 246 | static void if_beg(void) { printf("function '%s' is not implemented!", __FUNCTION__); } 247 | static void if_else(void) { printf("function '%s' is not implemented!", __FUNCTION__); } 248 | static void if_end(void) { printf("function '%s' is not implemented!", __FUNCTION__); } 249 | static void jmp_begin(void) { printf("function '%s' is not implemented!", __FUNCTION__); } 250 | static void jmp_end(void) { printf("function '%s' is not implemented!", __FUNCTION__); } 251 | static void add_data(void) { printf("function '%s' is not implemented!", __FUNCTION__); } 252 | 253 | static void mov_r_imm32(compiler_t* ctx, reg_t reg, i32 imm, int* data_loc) 254 | { 255 | if(reg >= RAX && reg <= RDI) 256 | { 257 | db(ctx, 0x48); 258 | db(ctx, 0xb8 + (reg - RAX)); 259 | dd(ctx, imm); 260 | dd(ctx, 0); 261 | } else if(reg >= R8 && reg <= R15) 262 | { 263 | db(ctx, 0x49); 264 | db(ctx, 0xb8 + (reg - R8)); 265 | dd(ctx, imm); 266 | dd(ctx, 0); 267 | } else { 268 | perror("unhandled data_size"); 269 | } 270 | #if 0 271 | if(reg > EDI) 272 | { 273 | //only valid for RAX - RDI 274 | db(ctx, 0x48); 275 | db(ctx, 0xc7); 276 | db(ctx, 0xc0 + (reg - RAX)); 277 | } else if(reg < EAX) 278 | { 279 | //TODO: FIXME 16-bit unhandled 280 | perror("16-bit unhandled..."); 281 | } else 282 | { 283 | db(ctx, 0xb8 + (reg - EAX)); 284 | } 285 | dd(ctx, imm); 286 | #endif 287 | } 288 | 289 | static vreg_t mov(compiler_t* ctx, reg_t a, reg_t b) 290 | { 291 | //a and b must be same size register 292 | assert(a >= RAX && a <= RDI && b >= RAX && b <= RDI); 293 | db(ctx, 0x40); 294 | db(ctx, 0x89); 295 | db(ctx, 0xc0 + (b - RAX) * 8 + (a - RAX)); 296 | return a; 297 | } 298 | 299 | static vreg_t sub_regn_imm32(compiler_t* ctx, reg_t reg, i32 imm) 300 | { 301 | if(reg >= RAX && reg <= RDI) 302 | { 303 | db(ctx, 0x40); 304 | } 305 | db(ctx, 0x81); 306 | if(reg >= RAX && reg <= RDI) 307 | db(ctx, 0xe8 + (reg - RAX)); 308 | else 309 | db(ctx, 0xe8 + (reg - EAX)); 310 | dd(ctx, imm); 311 | } 312 | 313 | static vreg_t xor(compiler_t* ctx, reg_t a, reg_t b) 314 | { 315 | int nb = reg_count_bits(ctx, a); 316 | assert(nb == reg_count_bits(ctx, b)); 317 | switch(nb) 318 | { 319 | case 32: 320 | db(ctx, 0x31); 321 | db(ctx, 0xc0 + (b - EAX) * 8 + (a - EAX)); 322 | break; 323 | case 64: 324 | assert(a >= RAX && a <= RDI); 325 | assert(b >= RAX && b <= RDI); 326 | db(ctx, 0x48); 327 | db(ctx, 0x31); 328 | db(ctx, 0xc0 + (b - RAX) * 8 + (a - RAX)); 329 | break; 330 | default: 331 | perror("unhandled xor"); 332 | break; 333 | } 334 | return a; 335 | } 336 | //for local variables 337 | //not sure whether ARM or non-x86 support this, but for now just fix x86/x64 338 | 339 | //movsx , [byte,word,dword,qword] [src + lv->offset] 340 | 341 | static void load_lvalue_from_register_address_plus_offset(compiler_t *ctx, reg_t dst, reg_t src, lvalue_t *lv) 342 | { 343 | //TODO: first byte replace with 0x49 344 | //but just redo it all and just use bitflags 345 | //https://staffwww.fullcoll.edu/aclifton/cs241/lecture-instruction-format.html 346 | switch(data_size) 347 | { 348 | case 1: 349 | if(dst >= RAX && dst <= RDI) 350 | { 351 | db(ctx, 0x48); 352 | db(ctx, 0x0f); 353 | db(ctx, 0xbe); 354 | db(ctx, 0x85 + (dst - RAX) * 8); 355 | } else if(dst >= R8 && dst <= R15) 356 | { 357 | db(ctx, 0x48); 358 | db(ctx, 0x0f); 359 | db(ctx, 0xbe); 360 | db(ctx, 0x85 + (dst - R8) * 8); 361 | } else { 362 | perror("unhandled data_size"); 363 | } 364 | dd(ctx, lv->offset); 365 | break; 366 | 367 | case 4: 368 | if(dst >= RAX && dst <= RDI) 369 | { 370 | db(ctx, 0x48); 371 | db(ctx, 0x63); 372 | db(ctx, 0x85 + (dst - RAX) * 8); 373 | } else if(dst >= R8 && dst <= R15) 374 | { 375 | db(ctx, 0x4c); 376 | db(ctx, 0x63); 377 | db(ctx, 0x85 + (dst - R8) * 8); 378 | } else { 379 | perror("unhandled data_size"); 380 | } 381 | dd(ctx, lv->offset); 382 | break; 383 | 384 | case 8: 385 | if(dst >= RAX && dst <= RDI) 386 | { 387 | db(ctx, 0x48); 388 | db(ctx, 0x8b); 389 | db(ctx, 0x85 + (dst - RAX) * 8); 390 | } 391 | else if(dst >= R8 && dst <= R15) 392 | { 393 | db(ctx, 0x4c); 394 | db(ctx, 0x8b); 395 | db(ctx, 0x85 + (dst - R8) * 8); 396 | } else 397 | perror("unhandled data_size"); 398 | dd(ctx, lv->offset); 399 | break; 400 | 401 | case 2: 402 | //first xor, then mov [word] 403 | perror("unhandled data_size"); 404 | break; 405 | 406 | default: 407 | perror("unhandled data_size"); 408 | break; 409 | } 410 | } 411 | 412 | //e.g lea , [rbp + offset] 413 | static void load_lvalue_address_to_register(compiler_t *ctx, reg_t dest, lvalue_t *src) 414 | { 415 | //TODO: check offset type 416 | if(reg >= RAX && reg <= RDI) 417 | { 418 | db(ctx, 0x48); 419 | db(ctx, 0x8d); 420 | db(ctx, 0x85 + (dest - RAX)); 421 | } else if(reg >= R8 && reg <= R15) 422 | { 423 | db(ctx, 0x4c); 424 | db(ctx, 0x8d); 425 | db(ctx, 0x85 + (dest - R8)); 426 | } 427 | dd(ctx, src->offset); 428 | } 429 | 430 | static void store_value_offset_from_register_to_stack(compiler_t *ctx, reg_t reg, int offset, int data_size) 431 | { 432 | if(reg >= RAX && reg <= RDI) 433 | { 434 | db(ctx, 0x48); 435 | db(ctx, 0x89); 436 | db(ctx, 0x85 + (reg - RAX) * 8); 437 | } else if(reg >= R8 && reg <= R15) 438 | { 439 | db(ctx, 0x4c); 440 | db(ctx, 0x89); 441 | db(ctx, 0x85 + (reg - R8) * 8); 442 | } else { 443 | perror("unhandled data_size"); 444 | } 445 | dd(ctx, offset); 446 | } 447 | 448 | static int reg_is_new_64_bit(reg_t reg) 449 | { 450 | return reg >= R8 && reg <= R15; 451 | } 452 | 453 | static int reg_is_old_64_bit(reg_t reg) 454 | { 455 | return reg >= RAX && reg <= RDI; 456 | } 457 | 458 | static reg_t add(compiler_t* ctx, reg_t a, reg_t b) 459 | { 460 | assert(reg_count_bits(ctx, a) == reg_count_bits(ctx, b)); 461 | if(!reg_is_new_64_bit(a)) 462 | { 463 | if(!reg_is_new_64_bit(b)) 464 | { 465 | db(ctx, 0x48); 466 | db(ctx, 0x01); 467 | db(ctx, 0xc0 + (b - RAX) * 8 + (a - RAX)); 468 | } else 469 | { 470 | db(ctx, 0x4c); 471 | db(ctx, 0x01); 472 | db(ctx, 0xc0 + (b - R8) * 8 + (a - RAX)); 473 | } 474 | } else { 475 | if(!reg_is_new_64_bit(b)) 476 | { 477 | db(ctx, 0x49); 478 | db(ctx, 0x01); 479 | db(ctx, 0xc0 + (b - RAX) * 8 + (a - R8)); 480 | } else 481 | { 482 | db(ctx, 0x4d); 483 | db(ctx, 0x01); 484 | db(ctx, 0xc0 + (b - R8) * 8 + (a - R8)); 485 | } 486 | } 487 | return a; 488 | } 489 | 490 | static const char *register_name(compiler_t *ctx, reg_t reg) 491 | { 492 | return x64_register_strings[reg]; 493 | } 494 | 495 | void codegen_x64(compiler_t *ctx) 496 | { 497 | codegen_t *cg = &ctx->cg; 498 | 499 | cg->numreginfo = X64_REGISTER_MAX; 500 | cg->reginfo = arena_alloc(ctx->allocator, sizeof(reginfo_t) * cg->numreginfo); 501 | 502 | for(int i = 0; i < COUNT_OF(x64_register_slots); ++i) 503 | { 504 | //printf("slot %d:", i); 505 | for(int j = 0; x64_register_slots[i][j] != -1; ++j) 506 | { 507 | //printf(" %s", register_name(ctx, x64_register_slots[i][j])); 508 | cg->reginfo[x64_register_slots[i][j]].slot = i; 509 | cg->reginfo[x64_register_slots[i][j]].name = register_name(ctx, x64_register_slots[i][j]); 510 | cg->reginfo[x64_register_slots[i][j]].id = x64_register_slots[i][j]; 511 | } 512 | //printf("\n"); 513 | } 514 | 515 | for(int i = 0; i < COUNT_OF(x64_register_bits); ++i) 516 | { 517 | //printf("bits %d:", x64_register_bits[i].bits); 518 | for(int j = 0; x64_register_bits[i].registers[j] != -1; ++j) 519 | { 520 | //printf(" %s", register_name(ctx, x64_register_bits[i].registers[j])); 521 | cg->reginfo[x64_register_bits[i].registers[j]].bits = x64_register_bits[i].bits; 522 | } 523 | //printf("\n"); 524 | } 525 | 526 | cg->map_register = map_reg; 527 | cg->unmap_register = unmap_reg; 528 | cg->register_name = register_name; 529 | 530 | cg->load_value_offset_from_stack_to_register = load_value_offset_from_stack_to_register; 531 | cg->store_value_offset_from_register_to_stack = store_value_offset_from_register_to_stack; 532 | 533 | cg->add = add; 534 | cg->sub = sub; 535 | cg->mod = mod; 536 | cg->imul = imul; 537 | cg->idiv = idiv; 538 | cg->add_imm8_to_r32 = add_imm8_to_r32; 539 | cg->add_imm32_to_r32 = add_imm32_to_r32; 540 | cg->inc = inc; 541 | cg->neg = neg; 542 | cg->sub_regn_imm32 = sub_regn_imm32; 543 | cg->xor = xor; 544 | cg->and = and; 545 | cg->or = or; 546 | cg->int3 = int3; 547 | cg->nop = nop; 548 | cg->invoke_syscall = invoke_syscall; 549 | cg->exit_instr = exit_instr; 550 | cg->push = push; 551 | cg->pop = pop; 552 | cg->load_reg = load_reg; 553 | cg->store_reg = store_reg; 554 | cg->load_regn_base_offset_imm32 = load_regn_base_offset_imm32; 555 | cg->ret = ret; 556 | cg->indirect_call_imm32 = indirect_call_imm32; 557 | cg->call_imm32 = call_imm32; 558 | cg->call_r32 = call_r32; 559 | cg->mov_r_imm32 = mov_r_imm32; 560 | cg->mov_r_string = mov_r_string; 561 | cg->mov = mov; 562 | cg->cmp = cmp; 563 | cg->test = test; 564 | cg->if_beg = if_beg; 565 | cg->if_else = if_else; 566 | cg->if_end = if_end; 567 | cg->jmp_begin = jmp_begin; 568 | cg->jmp_end = jmp_end; 569 | cg->add_data = add_data; 570 | } 571 | -------------------------------------------------------------------------------- /x86.c: -------------------------------------------------------------------------------- 1 | #include "compile.h" 2 | #include "imm.h" 3 | #include "operand.h" 4 | #include "virtual_opcodes.h" 5 | #include "util.h" 6 | 7 | static int32_t voperand_cast_i32(voperand_t *o) 8 | { 9 | assert(o->type == VOPERAND_IMMEDIATE); 10 | return imm_cast_int32_t(&o->imm); 11 | } 12 | 13 | typedef enum 14 | { 15 | EAX, 16 | ECX, 17 | EDX, 18 | EBX, 19 | ESP, 20 | EBP, 21 | ESI, 22 | EDI 23 | } X86_REGISTER; 24 | 25 | typedef enum 26 | { 27 | PUSH32r, 28 | PUSH32i8, 29 | PUSH32i32, 30 | PUSH32rmm 31 | } x86_instr_t; 32 | 33 | void add(heap_string *s, voperand_t *dst, voperand_t *src) 34 | { 35 | // TODO: build table 36 | // https://qbdi.readthedocs.io/en/stable/architecture_support.html 37 | // of e.g all the ADD specific instructions then match them by their operands (maybe fix VOPERAND types and add 38 | // float/double to that aswell, since it's based on size for now) then just loop through the mappings e.g 39 | // static map_t mappings[] = {{ADD32ri, VOPERAND_REGISTER, VOPERAND_IMMEDIATE32/VOPERAND_IMMEDIATE8}; 40 | switch(dst->type) 41 | { 42 | case VOPERAND_REGISTER: 43 | //TODO: 44 | break; 45 | } 46 | } 47 | 48 | void push(heap_string *s, voperand_t *op) 49 | { 50 | switch(op->type) 51 | { 52 | case VOPERAND_REGISTER: 53 | db(s, 0x50 + op->reg.index); 54 | break; 55 | case VOPERAND_IMMEDIATE: 56 | db(s, 0x68); 57 | dd(s, voperand_cast_i32(op)); 58 | break; 59 | default: 60 | perror("unhandled"); 61 | break; 62 | } 63 | } 64 | 65 | bool x86(function_t *f, heap_string *s) 66 | { 67 | for(size_t i = 0; i < f->instruction_index; ++i) 68 | { 69 | vinstr_t* instr = &f->instructions[i]; 70 | voperand_t* op = &instr->operands[0]; 71 | switch(instr->opcode) 72 | { 73 | case VOP_CALL: 74 | db(s, 0xe8); 75 | dd(s, 0x0); // TODO: replace 76 | break; 77 | 78 | case VOP_SUB: 79 | assert(instr->numoperands == 2); 80 | sub(s, &instr->operands[0], &instr->operands[1]); 81 | break; 82 | case VOP_MUL: 83 | assert(instr->numoperands == 2); 84 | mul(s, &instr->operands[0], &instr->operands[1]); 85 | break; 86 | case VOP_DIV: 87 | assert(instr->numoperands == 2); 88 | div(s, &instr->operands[0], &instr->operands[1]); 89 | break; 90 | case VOP_ADD: 91 | assert(instr->numoperands == 2); 92 | add(s, &instr->operands[0], &instr->operands[1]); 93 | break; 94 | 95 | case VOP_PUSH: 96 | push(s, op); 97 | break; 98 | 99 | case VOP_ALLOCA: 100 | { 101 | i32 numbytes = voperand_cast_i32(&instr->operands[0]); 102 | numbytes += 16 - (numbytes % 16); 103 | db(s, 0x81); 104 | db(s, 0xec); 105 | dd(s, numbytes); 106 | } 107 | break; 108 | case VOP_ENTER: 109 | db(s, 0x55); //push ebp 110 | db(s, 0x89); //mov ebp, esp 111 | db(s, 0xe5); 112 | break; 113 | case VOP_LEAVE: 114 | db(s, 0x5d); //pop ebp 115 | db(s, 0x89); //mov esp, ebp 116 | db(s, 0xec); 117 | break; 118 | 119 | default: 120 | printf("unhandled opcode %s", vopcode_names[instr->opcode]); 121 | return false; 122 | } 123 | } 124 | return true; 125 | } 126 | --------------------------------------------------------------------------------