├── .gitignore ├── LICENSE ├── lib ├── asm2machine.sh ├── atoi_itoa.asm └── readme.txt ├── readme.txt ├── src ├── ceed.h ├── ceed.l ├── ceed.y ├── ceedcmpl.c ├── ceedelf.c ├── ceedpe.c ├── ceedrtl.c └── makefile └── tst ├── hello.e ├── loop.e └── math.e /.gitignore: -------------------------------------------------------------------------------- 1 | # Object files 2 | *.o 3 | *.ko 4 | *.obj 5 | *.elf 6 | 7 | # Precompiled Headers 8 | *.gch 9 | *.pch 10 | 11 | # Libraries 12 | *.lib 13 | *.a 14 | *.la 15 | *.lo 16 | 17 | # Shared objects (inc. Windows DLLs) 18 | *.dll 19 | *.so 20 | *.so.* 21 | *.dylib 22 | 23 | # Executables 24 | *.exe 25 | *.out 26 | *.app 27 | *.i*86 28 | *.x86_64 29 | *.hex 30 | 31 | # Debug files 32 | *.dSYM/ 33 | *.su 34 | 35 | # VIM temporary 36 | *.swp 37 | *.swo 38 | *.un~ 39 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017, Pankaj Garg 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 18 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 21 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 22 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | -------------------------------------------------------------------------------- /lib/asm2machine.sh: -------------------------------------------------------------------------------- 1 | nasm -f elf atoi_itoa.asm 2 | rm -f machine.asm 3 | for i in $(objdump -d ./atoi_itoa.o |grep "^ " |cut -f2); do echo -n '\x'$i >> machine.asm; done; 4 | 5 | -------------------------------------------------------------------------------- /lib/atoi_itoa.asm: -------------------------------------------------------------------------------- 1 | BITS 32 2 | GLOBAL _start 3 | SECTION .text 4 | _start: 5 | itoa: 6 | 7 | ; eax must contain the number that needs to be converted. 8 | 9 | ; ebx must point to a buffer that would store the converted string. 10 | ; The buffer must have at least 12 bytes to contain maximum 4-byte number 11 | ; including minus sign. 12 | 13 | push ebp 14 | mov ebp, esp 15 | sub esp, 4 ; Allocate 4 bytes for string length 16 | 17 | add ebx, 11 ; Mov to end of buffer 18 | mov byte [ebx], 0 ; Put \0 or NULL at the end 19 | mov ecx, 10 ; Divisor 20 | mov dword [ebp - 4], 0 ; ebp-4 will contain string length 21 | 22 | .checknegative: 23 | xor edi, edi 24 | cmp eax, 0 25 | jge .divloop 26 | neg eax 27 | mov edi, 1 28 | 29 | .divloop: 30 | xor edx, edx ; Zero out edx (remainder is in edx after idiv) 31 | idiv ecx ; Divide eax by ecx 32 | add edx, 0x30 ; Add 0x30 to the remainder to get ASCII value 33 | dec ebx ; Move the pointer backwards in the buffer 34 | mov byte [ebx], dl ; Move the character into the buffer 35 | inc dword [ebp - 4] ; Increase the length 36 | 37 | cmp eax, 0 ; Was the result zero? 38 | jnz .divloop ; No it wasn't, keep looping 39 | 40 | .minussign: 41 | cmp edi, 1 42 | jne .done 43 | 44 | dec ebx 45 | mov byte [ebx], 0x2d 46 | inc dword [ebp - 4] ; Increase the length 47 | 48 | .done: 49 | mov ecx, ebx ; ecx points to the beginning of the string 50 | mov edx, [ebp - 4] ; ebp-4 contains the length - move it into edx 51 | 52 | mov esp, ebp ; Clean up our stack 53 | pop ebp 54 | ret 55 | 56 | 57 | 58 | atoi: 59 | 60 | ; ebx must point to the input buffer that is NULL terminated. 61 | 62 | push ebp 63 | mov ebp, esp 64 | sub esp, 4 65 | 66 | .init: 67 | ; Initialize variables and registers 68 | xor edi, edi ; Used as counter 69 | mov ecx, 10 ; Used as multiplier 70 | xor eax, eax ; Returns result 71 | xor edx, edx ; Temporary 72 | mov dword [ebp - 4], 0 ; Store result 73 | 74 | .checknegative: 75 | xor esi, esi ; Used to represent negative numbers 76 | mov dl, [ebx + edi] ; Select the character 77 | cmp dl, 0x2d ; Check if this is - sign 78 | jne .multiplyLoop 79 | mov esi, 1 ; esi = 1 represents negative numers 80 | inc edi ; mov buffer pointer 81 | 82 | .multiplyLoop: 83 | mov eax, [ebp - 4] ; Get saved value 84 | mul ecx ; Make this value 10x 85 | jo .invalid ; Jump in case of overflow 86 | mov dl, [ebx + edi] ; Select the character 87 | cmp dl, 0x30 ; Validate character between 0-9 88 | jl .invalid 89 | cmp dl, 0x39 90 | jg .invalid 91 | sub dl, 0x30 ; Subtract ASCII 48 to get its actual value 92 | add eax, edx ; Add new value and 10x old value 93 | mov [ebp - 4], eax ; Save result 94 | inc edi ; Increase the counter 95 | cmp byte [ebx + edi], 0 ; Have we reached a null terminator? 96 | je .finish ; If yes, we are done. 97 | cmp byte [ebx + edi], 0xa ; Have we reached a newline character? 98 | je .finish ; If yes, we are done. 99 | cmp byte [ebx + edi], 0xd ; Did we reach a carriage return (in windows)? 100 | je .finish ; If yes, we are done. 101 | jmp .multiplyLoop ; Loop back 102 | 103 | .finish: 104 | mov eax, [ebp - 4] ; Result in eax 105 | 106 | .minussign: 107 | cmp esi, 1 108 | jne .done 109 | neg eax 110 | jmp .done 111 | 112 | .invalid: 113 | xor eax, eax 114 | 115 | .done: 116 | mov esp, ebp ; clean up our stack 117 | pop ebp 118 | ret 119 | 120 | -------------------------------------------------------------------------------- /lib/readme.txt: -------------------------------------------------------------------------------- 1 | Lib folder provides asm implementation of helper routines that are compiled 2 | into machine code using nasm and injected in Ceed target binaries. Currently, 3 | only two functions: atoi and itoa are supported. 4 | 5 | - Use asm2machine.sh to convert code in atoi_itoa.asm into machine code. 6 | - Copy this machine code into ceedrtl.c in the respective function. 7 | 8 | -------------------------------------------------------------------------------- /readme.txt: -------------------------------------------------------------------------------- 1 | Ceed 2 | ---- 3 | Ceed is a tiny open source compiler for x86 Linux and Windows. It can compile 4 | a high level language source code into ELF or PE executable. Design and 5 | implementation of Ceed is described at: http://logicpundit.com/blog/ceed 6 | 7 | 8 | Folders 9 | ------- 10 | src - Contains source code for Ceed compiler 11 | tst - Contains some test program for Ceed compiler 12 | lib - Contains asm code for functions injected in Ceed output 13 | 14 | 15 | Compile 16 | ------- 17 | // Linux or Cygwin 18 | cd src 19 | make ceed 20 | 21 | 22 | Usage 23 | ------- 24 | To create ELF executable: ./ceed < 25 | To create PE executable: ./ceed -pe < 26 | 27 | 28 | Example 29 | ------- 30 | ./ceed < math.e 31 | chmod +x a.exe or chmod +x a.out to make the file executable. 32 | On Windows, even if you produce a.exe in cygwin, you can only run it in cmd 33 | prompt. If you run output file in cygwin, it won't work. 34 | 35 | 36 | Notes 37 | ----- 38 | - Only 32-bit Linux and Windows are supported 39 | - Code generated by Ceed is sub-optimal and may contain bugs. Only use for 40 | learning purpose. 41 | 42 | -------------------------------------------------------------------------------- /src/ceed.h: -------------------------------------------------------------------------------- 1 | /*++ 2 | 3 | Copyright (c) 2017, Pankaj Garg 4 | All rights reserved. 5 | 6 | This software may be modified and distributed under the terms of the BSD 7 | license. See the LICENSE file for details. 8 | 9 | --*/ 10 | typedef unsigned char u8, *pu8; 11 | typedef unsigned short u16, *pu16; 12 | typedef unsigned int u32, *pu32; 13 | typedef unsigned long long u64, *pu64; 14 | typedef char s8, *ps8; 15 | typedef short s16, *ps16; 16 | typedef long s32, *ps32; 17 | typedef long long s64, *ps64; 18 | typedef void *pvoid; 19 | 20 | typedef enum _sym_typ { 21 | sym_typ_const, 22 | sym_typ_ident, 23 | sym_typ_stmt 24 | } sym_typ; 25 | 26 | // Constants 27 | typedef struct _sym_const { 28 | int value; 29 | } sym_const; 30 | 31 | // Identifiers 32 | typedef struct _sym_ident { 33 | int index; 34 | } sym_ident; 35 | 36 | // Expressions 37 | typedef struct _sym_stmt { 38 | int type; 39 | int sym_count; 40 | struct _sym *sym_list[1]; 41 | } sym_stmt; 42 | 43 | typedef struct _sym { 44 | sym_typ type; 45 | union { 46 | sym_const con; 47 | sym_ident ident; 48 | sym_stmt stmt; 49 | }; 50 | } sym; 51 | 52 | extern int func[26]; 53 | 54 | u32 emit_code(sym *p); 55 | void emit8(u8 i); 56 | void emit16(u16 i); 57 | void emit32(u32 i); 58 | 59 | #define round_up(n, r) ((((n) + ((r)-1))/(r))*(r)) 60 | 61 | u32 add_str(pu8 str); 62 | void cmplr_init(); 63 | void elf_init(); 64 | void pe_init(); 65 | 66 | typedef void (*pfn_gen_exe_file)(pvoid ei); 67 | typedef pvoid (*pfn_set_exe_scn)(pvoid ei, pu8 scn_data, u32 scn_size); 68 | typedef pvoid (*pfn_get_va)(pvoid ei); 69 | typedef void (*pfn_emit_main_init)(); 70 | typedef void (*pfn_emit_main_exit)(); 71 | typedef void (*pfn_emit_write)(u32 buf_addr, u32 buf_len); 72 | typedef void (*pfn_emit_write_reg_input)(); 73 | typedef void (*pfn_emit_read)(u32 buf_addr, u32 buf_len); 74 | 75 | extern pvoid ei; 76 | extern pfn_gen_exe_file gen_exe_file; 77 | extern pfn_set_exe_scn set_exe_code_scn; 78 | extern pfn_set_exe_scn set_exe_rdata_scn; 79 | extern pfn_get_va get_code_va; 80 | extern pfn_get_va get_data_va; 81 | extern pfn_get_va get_rdata_va; 82 | extern pfn_emit_main_init emit_main_init; 83 | extern pfn_emit_main_exit emit_main_exit; 84 | extern pfn_emit_write emit_write; 85 | extern pfn_emit_write_reg_input emit_write_reg_input; 86 | extern pfn_emit_read emit_read; 87 | 88 | -------------------------------------------------------------------------------- /src/ceed.l: -------------------------------------------------------------------------------- 1 | /*++ 2 | 3 | Copyright (c) 2017, Pankaj Garg 4 | All rights reserved. 5 | 6 | This software may be modified and distributed under the terms of the BSD 7 | license. See the LICENSE file for details. 8 | 9 | --*/ 10 | %{ 11 | #include 12 | #include 13 | #include "ceed.h" 14 | #include "ceed.tab.h" 15 | void yyerror(char *); 16 | %} 17 | 18 | %option yylineno 19 | %option noyywrap 20 | %x comment 21 | 22 | %% 23 | 24 | "/*" BEGIN comment; 25 | . ; 26 | \n ; 27 | "*/" BEGIN 0; 28 | 29 | [a-z] { 30 | yylval.index = *yytext - 'a'; 31 | return VAR_LINT; 32 | } 33 | 34 | [A-Z] { 35 | yylval.index = yytext[0] - 'A' + 26; 36 | return VAR_GINT; 37 | } 38 | 39 | _[a-z] { 40 | yylval.index = yytext[1] - 'a' + 52; 41 | return FN_NAME; 42 | } 43 | 44 | 0 { 45 | yylval.value = atoi(yytext); 46 | return INT; 47 | } 48 | 49 | [1-9][0-9]* { 50 | yylval.value = atoi(yytext); 51 | return INT; 52 | } 53 | 54 | \".*\" { 55 | yylval.value = add_str(&yytext[1]); 56 | return STR; 57 | } 58 | 59 | [-()>=+;{}] { 60 | return *yytext; 61 | } 62 | 63 | "==" return EQ; 64 | "if" return IF; 65 | "el" return ELSE; 66 | "fn" return FUNCTION; 67 | "br" return BREAK; 68 | "lo" return LOOP; 69 | "ws" return WRITE_STR; 70 | "wi" return WRITE_INT; 71 | "ri" return READ_INT; 72 | "nl" return WRITE_NEWLINE; 73 | 74 | [ \t\n]+ ; /* ignore whitespace */ 75 | 76 | . { 77 | printf("Line: %d, Error: Unknown character '%s'\n", 78 | yylineno, yytext); 79 | exit(-1); 80 | } 81 | 82 | %% 83 | 84 | -------------------------------------------------------------------------------- /src/ceed.y: -------------------------------------------------------------------------------- 1 | /*++ 2 | 3 | Copyright (c) 2017, Pankaj Garg 4 | All rights reserved. 5 | 6 | This software may be modified and distributed under the terms of the BSD 7 | license. See the LICENSE file for details. 8 | 9 | --*/ 10 | %{ 11 | #include 12 | #include 13 | #include 14 | 15 | #include "ceed.h" 16 | 17 | #define YYDEBUG 1 18 | 19 | /* prototypes */ 20 | sym *mk_stmt(int type, int sym_count, ...); 21 | sym *mk_ident(int i); 22 | sym *mk_const(int value); 23 | void free_sym(sym *p); 24 | int yylex(void); 25 | void gen_exe(void); 26 | extern int yylineno; 27 | void yyerror(char *s); 28 | int func[26]; 29 | %} 30 | 31 | %define parse.error verbose 32 | 33 | %union { 34 | u32 value; /* Constant value */ 35 | u32 index; /* Identifier index */ 36 | sym *symbol; /* Symbol pointer */ 37 | }; 38 | 39 | %token INT 40 | %token STR 41 | %token VAR_LINT 42 | %token VAR_GINT 43 | %token FN_NAME 44 | %token LOOP IF WRITE_STR WRITE_INT READ_INT WRITE_NEWLINE 45 | %token FUNCTION 46 | %token BREAK 47 | %token FN_DEF 48 | %token FN_CALL 49 | %nonassoc IFX 50 | %nonassoc ELSE 51 | 52 | %left EQ '>' 53 | %left '+' '-' 54 | %nonassoc UMINUS 55 | 56 | %type str expr stmt compound_statement stmt_list func code code_block program 57 | 58 | %% 59 | 60 | program: 61 | code { emit_code($1); free_sym($1); } 62 | ; 63 | 64 | code: 65 | code_block { $$ = $1; } 66 | | code code_block { $$ = mk_stmt(';', 2, $1, $2); } 67 | ; 68 | 69 | code_block: 70 | func { $$ = $1; } 71 | /* Remove support for statements outside of functions. */ 72 | /* | stmt { $$ = $1; } */ 73 | ; 74 | 75 | func: 76 | FN_NAME compound_statement { $$ = mk_stmt(FN_DEF, 2, mk_ident($1), $2); } 77 | ; 78 | 79 | compound_statement 80 | : '{' '}' { $$ = mk_stmt(';', 2, NULL, NULL); } 81 | | '{' stmt_list '}' { $$ = $2; } 82 | ; 83 | 84 | stmt_list: 85 | stmt { $$ = $1; } 86 | | stmt_list stmt { $$ = mk_stmt(';', 2, $1, $2); } 87 | ; 88 | 89 | stmt: 90 | ';' { $$ = mk_stmt(';', 2, NULL, NULL); } 91 | | expr ';' { $$ = $1; } 92 | | WRITE_STR '(' str ')' ';' { $$ = mk_stmt(WRITE_STR, 1, $3); } 93 | | WRITE_INT '(' expr ')' ';' { $$ = mk_stmt(WRITE_INT, 1, $3); } 94 | | WRITE_NEWLINE '(' ')' ';' { $$ = mk_stmt(WRITE_NEWLINE, 0); } 95 | | VAR_LINT '=' expr ';' { $$ = mk_stmt('=', 2, mk_ident($1), $3); } 96 | | VAR_GINT '=' expr ';' { $$ = mk_stmt('=', 2, mk_ident($1), $3); } 97 | | LOOP '(' expr ')' stmt { $$ = mk_stmt(LOOP, 2, $3, $5); } 98 | | IF '(' expr ')' stmt %prec IFX { $$ = mk_stmt(IF, 2, $3, $5); } 99 | | IF '(' expr ')' stmt ELSE stmt { $$ = mk_stmt(IF, 3, $3, $5, $7); } 100 | | compound_statement { $$ = $1; } 101 | ; 102 | 103 | expr: 104 | INT { $$ = mk_const($1); } 105 | | VAR_LINT { $$ = mk_ident($1); } 106 | | VAR_GINT { $$ = mk_ident($1); } 107 | | expr '+' expr { $$ = mk_stmt('+', 2, $1, $3); } 108 | | expr '-' expr { $$ = mk_stmt('-', 2, $1, $3); } 109 | | expr '>' expr { $$ = mk_stmt('>', 2, $1, $3); } 110 | | expr EQ expr { $$ = mk_stmt(EQ, 2, $1, $3); } 111 | | '(' expr ')' { $$ = $2; } 112 | | READ_INT '(' ')' { $$ = mk_stmt(READ_INT, 0); } 113 | | FN_NAME '(' ')' { $$ = mk_stmt(FN_CALL, 1, mk_ident($1)); } 114 | ; 115 | 116 | str: 117 | STR { $$ = mk_const($1); } 118 | ; 119 | 120 | %% 121 | 122 | sym* 123 | mk_const(int value) 124 | { 125 | sym *p; 126 | 127 | if ((p = malloc(sizeof(sym))) == NULL) 128 | yyerror("out of memory"); 129 | 130 | p->type = sym_typ_const; 131 | p->con.value = value; 132 | 133 | return p; 134 | } 135 | 136 | sym* 137 | mk_ident(int i) 138 | { 139 | sym *p; 140 | 141 | if ((p = malloc(sizeof(sym))) == NULL) 142 | yyerror("out of memory"); 143 | 144 | p->type = sym_typ_ident; 145 | p->ident.index = i; 146 | 147 | return p; 148 | } 149 | 150 | sym* 151 | mk_stmt(int type, int sym_count, ...) 152 | { 153 | va_list ap; 154 | sym *p; 155 | int i; 156 | 157 | if ((p = malloc(sizeof(sym) + (sym_count-1) * sizeof(sym *))) == NULL) 158 | yyerror("out of memory"); 159 | 160 | p->type = sym_typ_stmt; 161 | p->stmt.type = type; 162 | p->stmt.sym_count = sym_count; 163 | va_start(ap, sym_count); 164 | for (i = 0; i < sym_count; i++) { 165 | p->stmt.sym_list[i] = va_arg(ap, sym*); 166 | } 167 | va_end(ap); 168 | 169 | return p; 170 | } 171 | 172 | void 173 | free_sym(sym *p) 174 | { 175 | int i; 176 | 177 | if (!p) return; 178 | if (p->type == sym_typ_stmt) { 179 | for (i = 0; i < p->stmt.sym_count; i++) 180 | free_sym(p->stmt.sym_list[i]); 181 | } 182 | free (p); 183 | } 184 | 185 | void 186 | yyerror(char *s) 187 | { 188 | fprintf(stdout, "Line: %d, Error: %s\n", yylineno, s); 189 | } 190 | 191 | int 192 | main(int argc, char *argv[]) 193 | { 194 | func[0] = -1; 195 | 196 | if (argc == 1) { 197 | elf_init(); 198 | } else if (argc == 2 && (strcmp(argv[1], "-pe") == 0)) { 199 | pe_init(); 200 | } else { 201 | printf("Invalid command line. Valid syntax is:\n"); 202 | printf("For ELF output: ceed < input_file\n"); 203 | printf("For PE output: ceed -pe < input_file\n"); 204 | exit(-1); 205 | } 206 | 207 | cmplr_init(); 208 | 209 | if (yyparse() == 0) { 210 | gen_exe(); 211 | return 0; 212 | } else { 213 | return -1; 214 | } 215 | } 216 | 217 | -------------------------------------------------------------------------------- /src/ceedcmpl.c: -------------------------------------------------------------------------------- 1 | /*++ 2 | 3 | Copyright (c) 2017, Pankaj Garg 4 | All rights reserved. 5 | 6 | This software may be modified and distributed under the terms of the BSD 7 | license. See the LICENSE file for details. 8 | 9 | --*/ 10 | #include 11 | #include 12 | #include "ceed.h" 13 | #include "ceed.tab.h" 14 | 15 | #define CEED_MAX_CODE_SIZE 0x100000 // 1MB 16 | #define CEED_MAX_RDATA_SIZE 0x100000 // 1MB 17 | static u32 code_pos = 0; 18 | static pu8 code; 19 | static u32 rdata_pos = 0; 20 | pu8 rdata; 21 | extern u8 itoa_code[]; 22 | extern u8 atoi_code[]; 23 | 24 | pvoid ei; 25 | pfn_gen_exe_file gen_exe_file; 26 | pfn_set_exe_scn set_exe_code_scn; 27 | pfn_set_exe_scn set_exe_rdata_scn; 28 | pfn_get_va get_code_va; 29 | pfn_get_va get_data_va; 30 | pfn_get_va get_rdata_va; 31 | pfn_emit_main_init emit_main_init; 32 | pfn_emit_main_exit emit_main_exit; 33 | pfn_emit_write emit_write; 34 | pfn_emit_write_reg_input emit_write_reg_input; 35 | pfn_emit_read emit_read; 36 | 37 | #define CEED_OP_ADD 0 38 | #define CEED_OP_SUB 1 39 | #define CEED_OP_GT 0 40 | #define CEED_OP_EQ 1 41 | 42 | #define CEED_TYPE_VAL 1 43 | #define CEED_TYPE_ADDR 2 44 | 45 | // 46 | // Reserve some temp space needed for itoa or atoi. 47 | // 48 | #define CEED_TEMP_ATOI_ITOA_BUF_ADDR 0x900 49 | // 50 | // Need at least 13 bytes on windows to accomodate: -2,147,483,648 followed 51 | // by CR-LF, which are appended on ReadConsole in Windows. We make length 16 52 | // bytes. 53 | // 54 | #define CEED_TEMP_ATOI_ITOA_BUF_LEN 0x10 55 | 56 | void 57 | chk_code_size(u32 size) 58 | { 59 | if ((size + code_pos) > CEED_MAX_CODE_SIZE) { 60 | printf("Error: Exceeded maximum CODE size of %d bytes.\n", 61 | CEED_MAX_CODE_SIZE); 62 | exit(-1); 63 | } 64 | } 65 | 66 | void 67 | emit8(u8 i) 68 | { 69 | chk_code_size(1); 70 | memcpy(&code[code_pos], &i, 1); 71 | code_pos++; 72 | } 73 | 74 | void 75 | emit16(u16 i) 76 | { 77 | chk_code_size(2); 78 | memcpy(&code[code_pos], &i, 2); 79 | code_pos += 2; 80 | } 81 | 82 | void 83 | emit32(u32 i) 84 | { 85 | chk_code_size(4); 86 | memcpy(&code[code_pos], &i, 4); 87 | code_pos += 4; 88 | } 89 | 90 | void 91 | emit32at(u32 pos, u32 i) 92 | { 93 | memcpy(&code[pos], &i, 4); 94 | } 95 | 96 | void 97 | emit_prolog() 98 | { 99 | // push ebp 100 | emit8(0x55); 101 | 102 | // mov ebp, esp 103 | emit8(0x89); 104 | emit8(0xe5); 105 | 106 | // sub esp, (26 * 4) 107 | emit8(0x83); 108 | emit8(0xec); 109 | emit8(26 * 4); 110 | } 111 | 112 | void 113 | emit_epilog_common() 114 | { 115 | // mov esp, ebp 116 | emit8(0x89); 117 | emit8(0xec); 118 | 119 | // pop ebp 120 | emit8(0x5d); 121 | 122 | } 123 | 124 | void 125 | emit_epilog() 126 | { 127 | emit_epilog_common(); 128 | 129 | // ret 130 | emit8(0xc3); 131 | } 132 | 133 | void 134 | emit_epilog_final() 135 | { 136 | emit_epilog_common(); 137 | // 138 | // For Windows, main can return by calling "ret" instruction.For Linux, 139 | // main can only exit by invoking sys_exit syscall.Hence we call platform 140 | // specific handler to generate exit code. 141 | // 142 | emit_main_exit(); 143 | } 144 | 145 | u32 146 | emit_var(sym *p) 147 | { 148 | if (p->ident.index >= 0 && p->ident.index <= 25) { 149 | // Local variable. 150 | 151 | // mov ecx, ebp 152 | emit8(0x89); 153 | emit8(0xe9); 154 | 155 | // sub ecx, x 156 | emit8(0x83); 157 | emit8(0xe9); 158 | emit8((p->ident.index + 1) * 4); 159 | } else if (p->ident.index >= 26 && p->ident.index <= 51) { 160 | // Global variable. 161 | 162 | // mov ecx, data_section_va + offset 163 | emit8(0xb9); 164 | emit32(get_data_va(ei) + ((p->ident.index - 26) * 4)); 165 | } 166 | return CEED_TYPE_ADDR; 167 | } 168 | 169 | u32 170 | emit_const(sym *p) 171 | { 172 | // Mov eax, imm32 173 | emit8(0xb8); 174 | emit32(p->con.value); 175 | return CEED_TYPE_VAL; 176 | } 177 | 178 | void 179 | emit_expr_val(sym *p) 180 | { 181 | if (emit_code(p) == CEED_TYPE_ADDR) { 182 | // mov eax, [ecx] 183 | emit8(0x8b); 184 | emit8(0x1); 185 | } 186 | } 187 | 188 | void 189 | emit_if_else(sym *p) 190 | { 191 | int patchPos = 0, codePosTmp1 = 0, codePosTmp2 = 0; 192 | int i = 0; 193 | 194 | emit_code(p->stmt.sym_list[0]); 195 | 196 | // cmp eax, 0 197 | emit8(0x83); 198 | emit8(0xf8); 199 | emit8(0x0); 200 | 201 | // je 202 | emit8(0xf); 203 | emit8(0x84); 204 | patchPos = code_pos; 205 | emit32(0xdeadbeef); 206 | codePosTmp1 = code_pos; 207 | emit_code(p->stmt.sym_list[1]); 208 | if (p->stmt.sym_count > 2) { 209 | // Account for an extra jmp here that is needed to skip else block. 210 | // Jmp instruction size is 5 so adding 5 to je; 211 | i = code_pos - codePosTmp1 + 5; 212 | emit32at(patchPos, i); 213 | 214 | // jmp 215 | emit8(0xe9); 216 | patchPos = code_pos; 217 | emit32(0xdeadbeef); 218 | codePosTmp1 = code_pos; 219 | emit_code(p->stmt.sym_list[2]); 220 | i = code_pos - codePosTmp1; 221 | emit32at(patchPos, i); 222 | } else { 223 | i = code_pos - codePosTmp1; 224 | emit32at(patchPos, i); 225 | } 226 | } 227 | 228 | void 229 | emit_arith(sym *p, int sym_list) 230 | { 231 | emit_expr_val(p->stmt.sym_list[1]); 232 | 233 | // push eax 234 | emit8(0x50); 235 | 236 | emit_expr_val(p->stmt.sym_list[0]); 237 | 238 | // pop ebx 239 | emit8(0x5b); 240 | 241 | if (sym_list == CEED_OP_ADD) { 242 | // add eax, ebx 243 | emit8(0x01); 244 | emit8(0xd8); 245 | } else { 246 | // sub eax, ebx 247 | emit8(0x29); 248 | emit8(0xd8); 249 | } 250 | } 251 | 252 | void 253 | emit_logical(sym *p, int sym_list) 254 | { 255 | emit_expr_val(p->stmt.sym_list[1]); 256 | 257 | // Push eax 258 | emit8(0x50); 259 | 260 | emit_expr_val(p->stmt.sym_list[0]); 261 | 262 | // pop ebx 263 | emit8(0x5b); 264 | 265 | // cmp eax, ebx 266 | emit8(0x39); 267 | emit8(0xd8); 268 | 269 | if (sym_list == CEED_OP_GT) { 270 | // jg 0x4 271 | emit8(0x7f); 272 | emit8(0x4); 273 | } else { 274 | // je 0x4 275 | emit8(0x74); 276 | emit8(0x4); 277 | } 278 | 279 | // xor eax, eax 280 | emit8(0x31); 281 | emit8(0xc0); 282 | 283 | // jmp 0x5 284 | emit8(0xeb); 285 | emit8(0x5); 286 | 287 | // mov eax, 1 288 | emit8(0xb8); 289 | emit32(0x1); 290 | } 291 | 292 | void 293 | emit_set_var(sym *p) 294 | { 295 | u8 tmp; 296 | u8 id = p->stmt.sym_list[0]->ident.index; 297 | 298 | // Get value of second operand in eax 299 | emit_expr_val(p->stmt.sym_list[1]); 300 | 301 | // 302 | // Set Local or Global variable based on index. 303 | // 304 | if (id >= 0 && id <= 25) { 305 | // mov [ebp + tmp], eax 306 | tmp = (0 - ((id + 1) * 4)); 307 | emit8(0x89); 308 | emit8(0x45); 309 | emit8(tmp); 310 | } else if (id >= 26 && id <= 51) { 311 | // mov [data_section_va + offset], eax 312 | emit8(0xa3); 313 | emit32(get_data_va(ei) + ((id - 26) * 4)); 314 | } else { 315 | printf("Error: Unexpected identifier: %d\n", id); 316 | exit(-1); 317 | } 318 | } 319 | 320 | void 321 | emit_func_def(sym *p) 322 | { 323 | u32 fn_id; 324 | 325 | fn_id = p->stmt.sym_list[0]->ident.index - 52; 326 | func[fn_id] = code_pos; 327 | 328 | emit_prolog(); 329 | 330 | if (fn_id == 0) { 331 | // 332 | // Emit any initialization code if needed. We use this to store 333 | // handle to stdin and stdout on Windows. 334 | // 335 | emit_main_init(); 336 | } 337 | 338 | // 339 | // Generate actual function code. 340 | // 341 | emit_code(p->stmt.sym_list[1]); 342 | 343 | if (fn_id == 0) { 344 | emit_epilog_final(); 345 | } else { 346 | emit_epilog(); 347 | } 348 | } 349 | 350 | void 351 | emit_func_call(sym *p) 352 | { 353 | int val = func[p->stmt.sym_list[0]->ident.index - 52]; 354 | 355 | // mov eax, 356 | emit8(0xb8); 357 | emit32(val + get_code_va(ei)); 358 | 359 | // call eax 360 | emit8(0xff); 361 | emit8(0xd0); 362 | } 363 | 364 | void 365 | emit_write_str(sym *p) 366 | { 367 | u32 str_len; 368 | str_len = strlen(rdata + p->con.value); 369 | emit_write(get_rdata_va(ei) + p->con.value, str_len); 370 | // printf("emit_write: %x, %d\n", get_rdata_va(ei) + p->con.value, str_len); 371 | } 372 | 373 | void 374 | emit_write_int() 375 | { 376 | // 377 | // At this point eax contains the number to be converted to string. 378 | // 379 | 380 | // mov ebx, itoa_buf 381 | emit8(0xbb); 382 | emit32(get_data_va(ei) + CEED_TEMP_ATOI_ITOA_BUF_ADDR); 383 | 384 | // mov ecx, (itoa) 385 | emit8(0xb9); 386 | emit32(get_code_va(ei)); 387 | 388 | // call ecx 389 | emit8(0xff); 390 | emit8(0xd1); 391 | 392 | // 393 | // This returns buffer in ecx and count in edx, which is suitable for 394 | // Linux syscall_write. 395 | // 396 | 397 | emit_write_reg_input(); 398 | } 399 | 400 | void 401 | emit_read_int() 402 | { 403 | // mov [addr], 0 - Zero first byte of buffer to ensure on error, we 404 | // don't convert the string to int with random values. 405 | emit16(0x05c6); 406 | emit32(get_data_va(ei) + CEED_TEMP_ATOI_ITOA_BUF_ADDR); 407 | emit8(0x0); 408 | 409 | emit_read(get_data_va(ei) + CEED_TEMP_ATOI_ITOA_BUF_ADDR, 410 | CEED_TEMP_ATOI_ITOA_BUF_LEN); 411 | 412 | // mov ebx, buf_addr 413 | emit8(0xbb); 414 | emit32(get_data_va(ei) + CEED_TEMP_ATOI_ITOA_BUF_ADDR); 415 | 416 | // mov ecx, (atoi) 417 | emit8(0xb9); 418 | emit32(get_code_va(ei) + 0x80); 419 | 420 | // call ecx 421 | emit8(0xff); 422 | emit8(0xd1); 423 | } 424 | 425 | u32 426 | emit_code(sym *p) 427 | { 428 | if (p == NULL) 429 | return 0; 430 | 431 | switch(p->type) { 432 | 433 | case sym_typ_const: { 434 | return emit_const(p); 435 | } 436 | 437 | case sym_typ_ident: { 438 | return emit_var(p); 439 | } 440 | 441 | case sym_typ_stmt: { 442 | 443 | switch(p->stmt.type) { 444 | 445 | case FN_DEF: { 446 | emit_func_def(p); 447 | break; 448 | } 449 | 450 | case FN_CALL: { 451 | emit_func_call(p); 452 | break; 453 | } 454 | 455 | case LOOP: { 456 | // 457 | // This is left as an exercise for the reader. 458 | // 459 | printf("Error: Loop support is not implemented.\n"); 460 | exit(-1); 461 | } 462 | 463 | case IF: { 464 | emit_if_else(p); 465 | break; 466 | } 467 | 468 | case WRITE_STR: { 469 | sym *s = p->stmt.sym_list[0]; 470 | emit_write_str(s); 471 | break; 472 | } 473 | 474 | case WRITE_INT: { 475 | emit_expr_val(p->stmt.sym_list[0]); 476 | emit_write_int(); 477 | break; 478 | } 479 | 480 | case WRITE_NEWLINE: { 481 | emit_write(get_rdata_va(ei), 1); 482 | break; 483 | } 484 | 485 | case READ_INT: { 486 | emit_read_int(); 487 | break; 488 | } 489 | 490 | case ';': { 491 | emit_code(p->stmt.sym_list[0]); 492 | emit_code(p->stmt.sym_list[1]); 493 | break; 494 | } 495 | 496 | case '=': { 497 | emit_set_var(p); 498 | break; 499 | } 500 | 501 | case '+': { 502 | emit_arith(p, CEED_OP_ADD); 503 | break; 504 | } 505 | 506 | case '-': { 507 | emit_arith(p, CEED_OP_SUB); 508 | break; 509 | } 510 | 511 | case '>': { 512 | emit_logical(p, CEED_OP_GT); 513 | break; 514 | } 515 | 516 | case EQ: { 517 | emit_logical(p, CEED_OP_EQ); 518 | break; 519 | } 520 | } 521 | } 522 | } 523 | 524 | return 0; 525 | } 526 | 527 | 528 | u32 529 | add_str(pu8 str) 530 | { 531 | u32 str_len; 532 | u32 ret_pos; 533 | 534 | str_len = strlen(str); 535 | str[str_len - 1] = '\0'; // Remove terminating " 536 | if ((rdata_pos + str_len) > CEED_MAX_RDATA_SIZE) { 537 | printf("Error: Exceeded maximum RDATA size of %d bytes.\n", 538 | CEED_MAX_RDATA_SIZE); 539 | exit(-1); 540 | } 541 | strcpy(&rdata[rdata_pos], str); 542 | ret_pos = rdata_pos; 543 | rdata_pos += str_len; 544 | return ret_pos; 545 | } 546 | 547 | void 548 | gen_exe() 549 | { 550 | set_exe_code_scn(ei, code, code_pos); 551 | set_exe_rdata_scn(ei, rdata, rdata_pos); 552 | gen_exe_file(ei); 553 | } 554 | 555 | void 556 | cmplr_init() 557 | { 558 | code = malloc(CEED_MAX_CODE_SIZE); 559 | rdata = malloc(CEED_MAX_RDATA_SIZE); 560 | 561 | if (code == NULL || rdata == NULL) { 562 | printf("Insufficient memory.\n"); 563 | exit(-1); 564 | } 565 | 566 | memset(code, 0, CEED_MAX_CODE_SIZE); 567 | memset(rdata, 0, CEED_MAX_RDATA_SIZE); 568 | 569 | // 570 | // Create a new line string as it is needed to print new lines. 571 | // 572 | rdata[0] = '\n'; 573 | rdata_pos = 2; 574 | 575 | 576 | // TODO: HACK - Get actual size. 577 | memcpy(code, itoa_code, 0x80); 578 | code_pos += 0x80; 579 | memcpy(&code[code_pos], atoi_code, 0x80); 580 | code_pos += 0x80; 581 | } 582 | 583 | -------------------------------------------------------------------------------- /src/ceedelf.c: -------------------------------------------------------------------------------- 1 | /*++ 2 | 3 | Copyright (c) 2017, Pankaj Garg 4 | All rights reserved. 5 | 6 | This software may be modified and distributed under the terms of the BSD 7 | license. See the LICENSE file for details. 8 | 9 | --*/ 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include "ceed.h" 16 | 17 | // 18 | // Standard ELF definitions. 19 | // 20 | 21 | #define EI_NIDENT 16 22 | 23 | typedef struct { 24 | u8 e_ident[EI_NIDENT]; 25 | u16 e_type; 26 | u16 e_machine; 27 | u32 e_version; 28 | u32 e_entry; 29 | u32 e_phoff; 30 | u32 e_shoff; 31 | u32 e_flags; 32 | u16 e_ehsize; 33 | u16 e_phentsize; 34 | u16 e_phnum; 35 | u16 e_shentsize; 36 | u16 e_shnum; 37 | u16 e_shstrndx; 38 | } Elf32_Ehdr; 39 | 40 | #define EH_SIZE 52 41 | #define EI_MAG0 0x7F 42 | #define EI_MAG1 'E' 43 | #define EI_MAG2 'L' 44 | #define EI_MAG3 'F' 45 | #define ELFCLASS32 0x1 46 | #define ELFDATA2LSB 0x1 47 | #define EV_CURRENT 0x1 48 | #define ELFOSABI_NONE 0x0 49 | #define ET_EXEC 0x2 50 | #define EM_386 0x3 51 | 52 | typedef struct { 53 | u32 p_type; 54 | u32 p_offset; 55 | u32 p_vaddr; 56 | u32 p_paddr; 57 | u32 p_filesz; 58 | u32 p_memsz; 59 | u32 p_flags; 60 | u32 p_align; 61 | } Elf32_Phdr; 62 | 63 | #define PH_SIZE 32 64 | #define PT_LOAD 0x1 65 | #define PF_X 0x1 66 | #define PF_W 0x2 67 | #define PF_R 0x4 68 | 69 | #define SH_SIZE 40 70 | 71 | // 72 | // ceed specific definitions and code. 73 | // 74 | 75 | #define CEED_FILE_ALIGN 0x1000 76 | #define CEED_SECTION_ALIGN 0x1000 77 | 78 | // 79 | // We reserve 16MB for various section to avoid dynamically calculating 80 | // subsequent section VA. This avoids any code patching requirements in our 81 | // compiler. More details on this can be found in ceedpe.c 82 | // 83 | #define CEED_DATA_SECTION_VA 0x600000 // 16MB 84 | #define CEED_CODE_SECTION_VA 0x700000 // 16MB 85 | #define CEED_RDATA_SECTION_VA 0x800000 // 16MB 86 | 87 | #define CEED_MAX_SECTION_COUNT 0x8 88 | 89 | typedef struct _section_info 90 | { 91 | Elf32_Phdr scn_hdr; 92 | pu8 scn_data; 93 | u32 scn_size; 94 | u32 scn_file_size; 95 | u32 scn_file_offset; 96 | u32 scn_virtual_size; 97 | } section_info, *psection_info; 98 | 99 | typedef struct _exe_info 100 | { 101 | psection_info data_scn; 102 | psection_info code_scn; 103 | psection_info rdata_scn; 104 | 105 | Elf32_Ehdr eh; 106 | u32 scn_count; 107 | psection_info scn_list[CEED_MAX_SECTION_COUNT]; 108 | } exe_info, *pexe_info; 109 | 110 | psection_info 111 | elf_new_section(pexe_info ei, u32 scn_va, u32 attr) 112 | { 113 | psection_info si = malloc(sizeof(section_info)); 114 | memset(si, 0, sizeof(section_info)); 115 | si->scn_hdr.p_type = PT_LOAD; 116 | si->scn_hdr.p_flags = attr; 117 | si->scn_hdr.p_vaddr = scn_va; 118 | si->scn_hdr.p_align = CEED_SECTION_ALIGN; 119 | ei->scn_list[ei->scn_count++] = si; 120 | return si; 121 | } 122 | 123 | pvoid 124 | elf_alloc_exe_info() 125 | { 126 | pexe_info ei = malloc(sizeof(exe_info)); 127 | memset(ei, 0, sizeof(exe_info)); 128 | 129 | ei->data_scn = elf_new_section(ei, CEED_DATA_SECTION_VA, PF_R | PF_W); 130 | ei->code_scn = elf_new_section(ei, CEED_CODE_SECTION_VA, PF_X | PF_R); 131 | ei->rdata_scn = elf_new_section(ei, CEED_RDATA_SECTION_VA, PF_R); 132 | 133 | return ei; 134 | } 135 | 136 | pvoid 137 | elf_set_scn(psection_info si, pu8 scn_data, u32 scn_size) 138 | { 139 | si->scn_data = scn_data; 140 | si->scn_size = scn_size; 141 | si->scn_virtual_size = round_up(scn_size, CEED_SECTION_ALIGN); 142 | si->scn_file_size = round_up(scn_size, CEED_FILE_ALIGN); 143 | 144 | si->scn_hdr.p_memsz = si->scn_virtual_size; 145 | si->scn_hdr.p_filesz = si->scn_file_size; 146 | return NULL; 147 | } 148 | 149 | pvoid 150 | elf_set_exe_code_scn(pvoid einfo, pu8 scn_data, u32 scn_size) 151 | { 152 | pexe_info ei = einfo; 153 | return elf_set_scn(ei->code_scn, scn_data, scn_size); 154 | } 155 | 156 | pvoid 157 | elf_set_exe_data_scn(pvoid einfo, pu8 scn_data, u32 scn_size) 158 | { 159 | pexe_info ei = einfo; 160 | return elf_set_scn(ei->data_scn, scn_data, scn_size); 161 | } 162 | 163 | pvoid 164 | elf_set_exe_rdata_scn(pvoid einfo, pu8 scn_data, u32 scn_size) 165 | { 166 | pexe_info ei = einfo; 167 | return elf_set_scn(ei->rdata_scn, scn_data, scn_size); 168 | } 169 | 170 | u32 171 | elf_get_code_va(pvoid einfo) 172 | { 173 | return CEED_CODE_SECTION_VA; 174 | } 175 | 176 | u32 177 | elf_get_data_va(pvoid einfo) 178 | { 179 | return CEED_DATA_SECTION_VA; 180 | } 181 | 182 | u32 183 | elf_get_rdata_va(pvoid einfo) 184 | { 185 | return CEED_RDATA_SECTION_VA; 186 | } 187 | 188 | void 189 | elf_write_hdr(pexe_info ei, FILE *file) 190 | { 191 | u32 i; 192 | 193 | i = 0; 194 | ei->eh.e_ident[i++] = EI_MAG0; 195 | ei->eh.e_ident[i++] = EI_MAG1; 196 | ei->eh.e_ident[i++] = EI_MAG2; 197 | ei->eh.e_ident[i++] = EI_MAG3; 198 | ei->eh.e_ident[i++] = ELFCLASS32; // EI_CLASS 199 | ei->eh.e_ident[i++] = ELFDATA2LSB; // EI_DATA 200 | ei->eh.e_ident[i++] = EV_CURRENT; // EI_VERSION 201 | ei->eh.e_ident[i++] = ELFOSABI_NONE; // EI_OSABI 202 | 203 | ei->eh.e_type = ET_EXEC; 204 | ei->eh.e_machine = EM_386; 205 | ei->eh.e_version = EV_CURRENT; 206 | 207 | ei->eh.e_phoff = EH_SIZE; 208 | ei->eh.e_ehsize = EH_SIZE; 209 | ei->eh.e_phentsize = PH_SIZE; 210 | ei->eh.e_shentsize = SH_SIZE; 211 | 212 | ei->eh.e_phnum = ei->scn_count; 213 | ei->eh.e_entry = CEED_CODE_SECTION_VA + func[0]; 214 | 215 | fwrite(&ei->eh, sizeof(Elf32_Ehdr), 1, file); 216 | } 217 | 218 | void 219 | elf_write_prg_hdrs(pexe_info ei, FILE *file) 220 | { 221 | u32 hdr_size; 222 | u32 cur_offset; 223 | 224 | hdr_size = EH_SIZE + (PH_SIZE * ei->scn_count); 225 | cur_offset = hdr_size; 226 | for (int i = 0; i < ei->scn_count; i++) 227 | { 228 | psection_info si = ei->scn_list[i]; 229 | cur_offset = round_up(cur_offset, CEED_SECTION_ALIGN); 230 | si->scn_hdr.p_offset = cur_offset; 231 | cur_offset += si->scn_virtual_size; 232 | fwrite(&si->scn_hdr, sizeof(Elf32_Phdr), 1, file); 233 | } 234 | } 235 | 236 | void 237 | elf_write_section_data(psection_info si, FILE *file) 238 | { 239 | fseek(file, si->scn_hdr.p_offset, SEEK_SET); 240 | fwrite(si->scn_data, si->scn_file_size, 1, file); 241 | } 242 | 243 | u8 data_buffer[4096]; 244 | #define CEED_MAX_VARIABLES 26 245 | void 246 | elf_gen_data_section(pexe_info ei) 247 | { 248 | // 249 | // Only 26 global variables of 4-byte int size are supported. 250 | // 251 | elf_set_exe_data_scn(ei, data_buffer, (CEED_MAX_VARIABLES * 4)); 252 | } 253 | 254 | void 255 | elf_gen_exe_file(pvoid einfo) 256 | { 257 | pexe_info ei = einfo; 258 | FILE *exe_file; 259 | 260 | if (func[0] == -1) 261 | { 262 | printf("Entry point function '_a' not found.\n"); 263 | exit(-1); 264 | } 265 | 266 | exe_file = fopen("a.out", "wb+"); 267 | if (exe_file == NULL) 268 | { 269 | printf("Failed to create output file (a.exe).\n"); 270 | exit(errno); 271 | } 272 | 273 | elf_gen_data_section(ei); 274 | elf_write_hdr(ei, exe_file); 275 | elf_write_prg_hdrs(ei, exe_file); 276 | for (int i = 0; i < ei->scn_count; i++) 277 | { 278 | psection_info si = ei->scn_list[i]; 279 | elf_write_section_data(si, exe_file); 280 | } 281 | 282 | fclose(exe_file); 283 | } 284 | 285 | void 286 | elf_emit_main_init() 287 | { 288 | } 289 | 290 | void 291 | elf_emit_main_exit() 292 | { 293 | // mov ebx, eax // set return code 294 | emit8(0x89); 295 | emit8(0xc3); 296 | 297 | // mov eax, 1 // syscall_exit 298 | emit8(0xb8); 299 | emit32(0x1); 300 | 301 | // int 0x80 // invoke syscall 302 | emit8(0xcd); 303 | emit8(0x80); 304 | } 305 | 306 | void 307 | elf_emit_write_reg_input() 308 | { 309 | // mov ebx, 1 (stdout) 310 | emit8(0xbb); 311 | emit32(0x1); 312 | 313 | // mov eax, 4 (syscall_write) 314 | emit8(0xb8); 315 | emit32(0x4); 316 | 317 | // int 0x80 318 | emit8(0xcd); 319 | emit8(0x80); 320 | } 321 | 322 | void 323 | elf_emit_write(u32 buf_addr, u32 buf_len) 324 | { 325 | // mov edx, str_len 326 | emit8(0xba); 327 | emit32(buf_len); 328 | 329 | // mov ecx, str_addr 330 | emit8(0xb9); 331 | // emit32(0x9049000 + p->con.value); 332 | emit32(buf_addr); 333 | 334 | elf_emit_write_reg_input(); 335 | } 336 | 337 | void 338 | elf_emit_read(u32 buf_addr, u32 buf_len) 339 | { 340 | // mov edx, buf_len 341 | emit8(0xba); 342 | emit32(buf_len); 343 | 344 | // mov ecx, buf_addr 345 | emit8(0xb9); 346 | emit32(buf_addr); 347 | 348 | // mov ebx, 0 (stdin) 349 | emit8(0xbb); 350 | emit32(0x0); 351 | 352 | // mov eax, 3 (syscall_read) 353 | emit8(0xb8); 354 | emit32(0x3); 355 | 356 | // int 0x80 357 | emit8(0xcd); 358 | emit8(0x80); 359 | 360 | } 361 | 362 | void 363 | elf_init() 364 | { 365 | ei = elf_alloc_exe_info(); 366 | gen_exe_file = elf_gen_exe_file; 367 | set_exe_code_scn = elf_set_exe_code_scn; 368 | set_exe_rdata_scn = elf_set_exe_rdata_scn; 369 | get_code_va = elf_get_code_va; 370 | get_data_va = elf_get_data_va; 371 | get_rdata_va = elf_get_rdata_va; 372 | emit_main_init = elf_emit_main_init; 373 | emit_main_exit = elf_emit_main_exit; 374 | emit_write = elf_emit_write; 375 | emit_write_reg_input = elf_emit_write_reg_input; 376 | emit_read = elf_emit_read; 377 | } 378 | 379 | -------------------------------------------------------------------------------- /src/ceedpe.c: -------------------------------------------------------------------------------- 1 | /*++ 2 | 3 | Copyright (c) 2017, Pankaj Garg 4 | All rights reserved. 5 | 6 | This software may be modified and distributed under the terms of the BSD 7 | license. See the LICENSE file for details. 8 | 9 | --*/ 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #include "ceed.h" 17 | 18 | // 19 | // Standard ELF definitions. 20 | // 21 | 22 | #define IMAGE_NT_SIGNATURE 0x00004550 23 | #define IMAGE_NT_OPTIONAL_HDR32_MAGIC 0x10b 24 | #define IMAGE_FILE_MACHINE_I386 0x014c 25 | #define IMAGE_NUMBEROF_DIRECTORY_ENTRIES 16 26 | #define IMAGE_SUBSYSTEM_WINDOWS_CUI 3 27 | #define IMAGE_DIRECTORY_ENTRY_IMPORT 1 28 | #define IMAGE_FILE_EXECUTABLE_IMAGE 0x0002 29 | #define IMAGE_FILE_32BIT_MACHINE 0x0100 30 | #define IMAGE_SCN_MEM_EXECUTE 0x20000000 31 | #define IMAGE_SCN_MEM_READ 0x40000000 32 | #define IMAGE_SCN_MEM_WRITE 0x80000000 33 | #define IMAGE_SIZEOF_SHORT_NAME 8 34 | 35 | #pragma pack (push, 1) 36 | 37 | typedef struct _IMAGE_FILE_HEADER { 38 | u16 Machine; 39 | u16 NumberOfSections; 40 | u32 TimeDateStamp; 41 | u32 PointerToSymbolTable; 42 | u32 NumberOfSymbols; 43 | u16 SizeOfOptionalHeader; 44 | u16 Characteristics; 45 | } IMAGE_FILE_HEADER, *PIMAGE_FILE_HEADER; 46 | 47 | typedef struct _IMAGE_DATA_DIRECTORY { 48 | u32 VirtualAddress; 49 | u32 Size; 50 | } IMAGE_DATA_DIRECTORY, *PIMAGE_DATA_DIRECTORY; 51 | 52 | typedef struct _IMAGE_OPTIONAL_HEADER { 53 | u16 Magic; 54 | u8 MajorLinkerVersion; 55 | u8 MinorLinkerVersion; 56 | u32 SizeOfCode; 57 | u32 SizeOfInitializedData; 58 | u32 SizeOfUninitializedData; 59 | u32 AddressOfEntryPoint; 60 | u32 BaseOfCode; 61 | u32 BaseOfData; 62 | 63 | u32 ImageBase; 64 | u32 SectionAlignment; 65 | u32 FileAlignment; 66 | u16 MajorOperatingSystemVersion; 67 | u16 MinorOperatingSystemVersion; 68 | u16 MajorImageVersion; 69 | u16 MinorImageVersion; 70 | u16 MajorSubsystemVersion; 71 | u16 MinorSubsystemVersion; 72 | u32 Win32VersionValue; 73 | u32 SizeOfImage; 74 | u32 SizeOfHeaders; 75 | u32 CheckSum; 76 | u16 Subsystem; 77 | u16 DllCharacteristics; 78 | u32 SizeOfStackReserve; 79 | u32 SizeOfStackCommit; 80 | u32 SizeOfHeapReserve; 81 | u32 SizeOfHeapCommit; 82 | u32 LoaderFlags; 83 | u32 NumberOfRvaAndSizes; 84 | IMAGE_DATA_DIRECTORY DataDirectory[IMAGE_NUMBEROF_DIRECTORY_ENTRIES]; 85 | } IMAGE_OPTIONAL_HEADER32, *PIMAGE_OPTIONAL_HEADER32; 86 | 87 | typedef struct _IMAGE_SECTION_HEADER { 88 | s8 Name[IMAGE_SIZEOF_SHORT_NAME]; 89 | union { 90 | u32 PhysicalAddress; 91 | u32 VirtualSize; 92 | } Misc; 93 | u32 VirtualAddress; 94 | u32 SizeOfRawData; 95 | u32 PointerToRawData; 96 | u32 PointerToRelocations; 97 | u32 PointerToLinenumbers; 98 | u16 NumberOfRelocations; 99 | u16 NumberOfLinenumbers; 100 | u32 Characteristics; 101 | } IMAGE_SECTION_HEADER, *PIMAGE_SECTION_HEADER; 102 | 103 | typedef struct _IMAGE_IMPORT_BY_NAME { 104 | u16 Hint; 105 | s8 Name[1]; 106 | } IMAGE_IMPORT_BY_NAME, *PIMAGE_IMPORT_BY_NAME; 107 | 108 | typedef struct _IMAGE_THUNK_DATA32 { 109 | union { 110 | u32 ForwarderString; 111 | u32 Function; 112 | u32 Ordinal; 113 | u32 AddressOfData; 114 | }; 115 | } IMAGE_THUNK_DATA32; 116 | typedef IMAGE_THUNK_DATA32 * PIMAGE_THUNK_DATA32; 117 | 118 | typedef struct _IMAGE_IMPORT_DESCRIPTOR { 119 | union { 120 | u32 Characteristics; 121 | u32 OriginalFirstThunk; 122 | } ; 123 | u32 TimeDateStamp; 124 | u32 ForwarderChain; 125 | u32 Name; 126 | u32 FirstThunk; 127 | } IMAGE_IMPORT_DESCRIPTOR; 128 | typedef IMAGE_IMPORT_DESCRIPTOR *PIMAGE_IMPORT_DESCRIPTOR; 129 | 130 | unsigned char dos_hdr[128] = { 131 | 0x4D, 0x5A, 0x90, 0x00, 0x03, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0x00, 0x00, 132 | 0xB8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 133 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 134 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 135 | 0x0E, 0x1F, 0xBA, 0x0E, 0x00, 0xB4, 0x09, 0xCD, 0x21, 0xB8, 0x01, 0x4C, 0xCD, 0x21, 0x54, 0x68, 136 | 0x69, 0x73, 0x20, 0x70, 0x72, 0x6F, 0x67, 0x72, 0x61, 0x6D, 0x20, 0x63, 0x61, 0x6E, 0x6E, 0x6F, 137 | 0x74, 0x20, 0x62, 0x65, 0x20, 0x72, 0x75, 0x6E, 0x20, 0x69, 0x6E, 0x20, 0x44, 0x4F, 0x53, 0x20, 138 | 0x6D, 0x6F, 0x64, 0x65, 0x2E, 0x0D, 0x0D, 0x0A, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 139 | }; 140 | #pragma pack(pop) 141 | 142 | // 143 | // ceed specific definitions and code. 144 | // 145 | 146 | // 147 | // We are using hardcoded section offset. The reason for this is simplicity. 148 | // 149 | // A code that is accessing a gloal variable or constant needs to know its 150 | // virtual address. Typically this requires two passes: 151 | // 152 | // First pass to generate code (as code generation is needed to calculate 153 | // section size, which is then used to calculate offset for each section and 154 | // using section offset, VA of global variables and constants is calculated. 155 | // 156 | // Second pass to patch the code that was generated without actually knowing 157 | // the offsets. 158 | // 159 | // By assuming fixed offsets, we know VA of each section ahead of time and 160 | // generate code in one pass without need for patching later. This makes code 161 | // simpler. One limitation of this scheme is that each section is limited to 162 | // 16MB, which is acceptable for this compiler as it is only for educational 163 | // purposes. 164 | // 165 | #define CEED_FILE_ALIGN 0x200 // 512B 166 | #define CEED_SECTION_ALIGN 0x100000 // 16MB 167 | 168 | #define CEED_IMAGE_BASE_VA 0x400000 169 | #define CEED_IMPORT_SECTION_RVA (CEED_SECTION_ALIGN * 1) 170 | #define CEED_DATA_SECTION_RVA (CEED_SECTION_ALIGN * 2) 171 | #define CEED_CODE_SECTION_RVA (CEED_SECTION_ALIGN * 3) 172 | #define CEED_RDATA_SECTION_RVA (CEED_SECTION_ALIGN * 4) 173 | 174 | 175 | // 176 | // We use bottom 2K of DATA section for any system state outside of user's 177 | // code. For example, to support read/write from console, we store handle to 178 | // stdin and stdout during initialization instead of only each read/write call. 179 | // 180 | #define CEED_STDIN_HANDLE_RVA 0x800 181 | #define CEED_STDOUT_HANDLE_RVA 0x804 182 | 183 | // 184 | // Reserve some temp space needs to invoke certain windows API. 185 | // 186 | #define CEED_TEMP_U32_1 0xf00 187 | 188 | // 189 | // Stores offset of k32 functions & array. This array would contain address of 190 | // functions imported from kernel32 after the exe and dlls are loaded by 191 | // windows loader. 192 | // 193 | u32 k32_fn_array; 194 | u32 fn_GetStdHandle; 195 | u32 fn_ReadConsoleA; 196 | u32 fn_WriteConsoleA; 197 | 198 | typedef struct _section_info 199 | { 200 | IMAGE_SECTION_HEADER scn_hdr; 201 | pu8 scn_data; 202 | u32 scn_size; 203 | u32 scn_file_size; 204 | u32 scn_file_offset; 205 | u32 scn_virtual_size; 206 | } section_info, *psection_info; 207 | 208 | typedef struct _exe_info 209 | { 210 | psection_info import_scn; 211 | psection_info data_scn; 212 | psection_info code_scn; 213 | psection_info rdata_scn; 214 | 215 | IMAGE_FILE_HEADER fh; 216 | IMAGE_OPTIONAL_HEADER32 oh; 217 | u32 scn_count; 218 | psection_info scn_list[8]; 219 | } exe_info, *pexe_info; 220 | 221 | 222 | psection_info 223 | pe_new_section(pexe_info ei, const char *name, u32 scn_va, u32 attr) 224 | { 225 | psection_info si = malloc(sizeof(section_info)); 226 | memset(si, 0, sizeof(section_info)); 227 | strcpy(si->scn_hdr.Name, name); 228 | si->scn_hdr.Characteristics = attr; 229 | si->scn_hdr.VirtualAddress = scn_va; 230 | ei->scn_list[ei->scn_count++] = si; 231 | return si; 232 | } 233 | 234 | pvoid 235 | pe_alloc_exe_info() 236 | { 237 | pexe_info ei = malloc(sizeof(exe_info)); 238 | memset(ei, 0, sizeof(exe_info)); 239 | 240 | ei->import_scn = pe_new_section(ei, ".idata", CEED_IMPORT_SECTION_RVA, 241 | IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE); 242 | 243 | ei->data_scn = pe_new_section(ei, ".data", CEED_DATA_SECTION_RVA, 244 | IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE); 245 | 246 | ei->code_scn = pe_new_section(ei, ".text", CEED_CODE_SECTION_RVA, 247 | IMAGE_SCN_MEM_EXECUTE | IMAGE_SCN_MEM_READ); 248 | 249 | ei->rdata_scn = pe_new_section(ei, ".rdata", CEED_RDATA_SECTION_RVA, 250 | IMAGE_SCN_MEM_READ); 251 | 252 | return ei; 253 | } 254 | 255 | pvoid 256 | pe_set_scn(psection_info si, pu8 scn_data, u32 scn_size) 257 | { 258 | si->scn_data = scn_data; 259 | si->scn_size = scn_size; 260 | si->scn_virtual_size = scn_size; 261 | si->scn_file_size = round_up(scn_size, CEED_FILE_ALIGN); 262 | 263 | si->scn_hdr.Misc.VirtualSize = si->scn_virtual_size; 264 | si->scn_hdr.SizeOfRawData = si->scn_file_size; 265 | return NULL; 266 | } 267 | 268 | pvoid 269 | pe_set_exe_code_scn(pvoid einfo, pu8 scn_data, u32 scn_size) 270 | { 271 | pexe_info ei = einfo; 272 | return pe_set_scn(ei->code_scn, scn_data, scn_size); 273 | } 274 | 275 | pvoid 276 | pe_set_exe_data_scn(pvoid einfo, pu8 scn_data, u32 scn_size) 277 | { 278 | pexe_info ei = einfo; 279 | return pe_set_scn(ei->data_scn, scn_data, scn_size); 280 | } 281 | 282 | pvoid 283 | pe_set_exe_rdata_scn(pvoid einfo, pu8 scn_data, u32 scn_size) 284 | { 285 | pexe_info ei = einfo; 286 | return pe_set_scn(ei->rdata_scn, scn_data, scn_size); 287 | } 288 | 289 | pvoid 290 | pe_set_exe_import_scn(pvoid einfo, pu8 scn_data, u32 scn_size) 291 | { 292 | pexe_info ei = einfo; 293 | return pe_set_scn(ei->import_scn, scn_data, scn_size); 294 | } 295 | 296 | u32 297 | pe_get_code_va(pvoid einfo) 298 | { 299 | return CEED_IMAGE_BASE_VA + CEED_CODE_SECTION_RVA; 300 | } 301 | 302 | u32 303 | pe_get_data_va(pvoid einfo) 304 | { 305 | return CEED_IMAGE_BASE_VA + CEED_DATA_SECTION_RVA; 306 | } 307 | 308 | u32 309 | pe_get_rdata_va(pvoid einfo) 310 | { 311 | return CEED_IMAGE_BASE_VA + CEED_RDATA_SECTION_RVA; 312 | } 313 | 314 | void 315 | pe_write_fixed_hdrs(pexe_info ei, FILE *file) 316 | { 317 | u32 nt_sig = IMAGE_NT_SIGNATURE; 318 | IMAGE_FILE_HEADER fh = { 0 }; 319 | 320 | fh.Machine = IMAGE_FILE_MACHINE_I386; 321 | fh.NumberOfSections = ei->scn_count; 322 | fh.TimeDateStamp = 0; 323 | fh.PointerToSymbolTable = 0; 324 | fh.NumberOfSymbols = 0; 325 | fh.SizeOfOptionalHeader = sizeof(IMAGE_OPTIONAL_HEADER32); 326 | fh.Characteristics = (IMAGE_FILE_EXECUTABLE_IMAGE | IMAGE_FILE_32BIT_MACHINE); 327 | 328 | fwrite(dos_hdr, sizeof(dos_hdr), 1, file); 329 | fwrite(&nt_sig, sizeof(nt_sig), 1, file); 330 | fwrite(&fh, sizeof(fh), 1, file); 331 | } 332 | 333 | 334 | void 335 | pe_write_optional_hdr(pexe_info ei, FILE *file) 336 | { 337 | IMAGE_OPTIONAL_HEADER32 oh = { 0 }; 338 | u32 hdr_size; 339 | 340 | oh.Magic = IMAGE_NT_OPTIONAL_HDR32_MAGIC; 341 | oh.MajorLinkerVersion = 0; 342 | oh.MinorLinkerVersion = 1; 343 | oh.SizeOfCode = 8; 344 | oh.SizeOfInitializedData = 0; 345 | oh.SizeOfUninitializedData = 0; 346 | oh.AddressOfEntryPoint = 0; // Fixed later. 347 | oh.BaseOfCode = 0; // Fixed later. 348 | oh.ImageBase = 0x400000; 349 | oh.SectionAlignment = CEED_SECTION_ALIGN; 350 | oh.FileAlignment = CEED_FILE_ALIGN; 351 | oh.MajorOperatingSystemVersion = 4; 352 | oh.MinorOperatingSystemVersion = 0; 353 | oh.MajorImageVersion = 0; 354 | oh.MinorImageVersion = 0; 355 | oh.MajorSubsystemVersion = 4; 356 | oh.MinorSubsystemVersion = 0; 357 | oh.Win32VersionValue = 0; 358 | oh.SizeOfImage = 0; // Fixed later. 359 | oh.SizeOfHeaders = 0; // Fixed later. 360 | oh.CheckSum = 0x1D68; 361 | oh.Subsystem = IMAGE_SUBSYSTEM_WINDOWS_CUI; 362 | oh.DllCharacteristics = 0; 363 | oh.SizeOfStackReserve = 0x100000; 364 | oh.SizeOfStackCommit = 0x1000; 365 | oh.SizeOfHeapReserve = 0x100000; 366 | oh.SizeOfHeapCommit = 0x1000; 367 | oh.LoaderFlags = 0; 368 | oh.NumberOfRvaAndSizes = 16; 369 | // Leave all DataDirectory as 0. 370 | 371 | oh.DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT].VirtualAddress = CEED_IMPORT_SECTION_RVA; 372 | oh.DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT].Size = ei->import_scn->scn_file_size; 373 | 374 | oh.AddressOfEntryPoint = CEED_CODE_SECTION_RVA + func[0]; 375 | oh.BaseOfCode = CEED_CODE_SECTION_RVA; 376 | 377 | // 378 | // This needs to be calculated to be the actual size in memory of the 379 | // image. So all section's virtual size. 380 | // 381 | hdr_size = sizeof(dos_hdr) + sizeof(u32) + sizeof(IMAGE_FILE_HEADER) + 382 | sizeof(IMAGE_OPTIONAL_HEADER32) + 383 | (sizeof(IMAGE_SECTION_HEADER) * ei->scn_count); 384 | oh.SizeOfImage = round_up(hdr_size, CEED_SECTION_ALIGN); 385 | for (int i = 0; i < ei->scn_count; i++) 386 | { 387 | oh.SizeOfImage += round_up(ei->scn_list[i]->scn_virtual_size, 388 | CEED_SECTION_ALIGN); 389 | } 390 | oh.SizeOfHeaders = hdr_size; 391 | 392 | fwrite(&oh, sizeof(oh), 1, file); 393 | } 394 | 395 | void 396 | pe_write_section_hdr(psection_info si, FILE *file) 397 | { 398 | fwrite(&si->scn_hdr, sizeof(si->scn_hdr), 1, file); 399 | } 400 | 401 | void 402 | pe_write_section_data(psection_info si, u32 file_offset, FILE *file) 403 | { 404 | fseek(file, file_offset, SEEK_SET); 405 | fwrite(si->scn_data, si->scn_file_size, 1, file); 406 | } 407 | 408 | u8 data_buffer[4096]; 409 | #define CEED_MAX_VARIABLES 26 410 | void 411 | pe_gen_data_section(pexe_info ei) 412 | { 413 | // 414 | // Only 26 global variables of 4-byte int size are supported. 415 | // 416 | pe_set_exe_data_scn(ei, data_buffer, (CEED_MAX_VARIABLES * 4)); 417 | } 418 | 419 | 420 | #define MAX_FUNCTIONS_IMPORT_PER_DLL 8 421 | #define c_assert(e) typedef char __c_assert__[(e)?1:-1] 422 | 423 | u8 import_buffer[4096]; 424 | void 425 | pe_gen_import_section(pexe_info ei) 426 | { 427 | pu8 import = import_buffer; 428 | IMAGE_IMPORT_DESCRIPTOR *iid; 429 | IMAGE_THUNK_DATA32 *th; 430 | IMAGE_THUNK_DATA32 *oth; 431 | IMAGE_IMPORT_BY_NAME *iin; 432 | const char *dll_names[] = { "kernel32.dll", "ntdll.dll" }; 433 | const char *fn_names[][10] = { 434 | { "WriteConsoleA", "ReadConsoleA", "GetStdHandle", 0 }, 435 | { "NtReadFile", 0 }, 436 | }; 437 | 438 | int dll_count = sizeof(dll_names) / sizeof(dll_names[0]); 439 | 440 | c_assert((sizeof(dll_names) / sizeof(dll_names[0])) == 441 | (sizeof(fn_names) / sizeof(fn_names[0]))); 442 | 443 | // 444 | // Add +1 in dll_count to add a NULL entry as dll import array is NULL 445 | // terminated. 446 | // 447 | u32 offset = (dll_count + 1) * sizeof(IMAGE_IMPORT_DESCRIPTOR); 448 | 449 | iid = (IMAGE_IMPORT_DESCRIPTOR *)import; 450 | 451 | for (int i = 0; i < dll_count; i++) { 452 | u32 thunk_count = 0; 453 | pu8 name = import + offset; 454 | u32 th_size; 455 | 456 | iid[i].Name = offset + CEED_IMPORT_SECTION_RVA; 457 | strcpy(name, dll_names[i]); 458 | offset += strlen(name) + 1; 459 | 460 | for (int j = 0; j < MAX_FUNCTIONS_IMPORT_PER_DLL; j++) { 461 | if (fn_names[i][j] == NULL) { 462 | break; 463 | } 464 | thunk_count++; 465 | } 466 | 467 | // 468 | // Add +1 in thunk_count to add a NULL entry as thunk array is NULL 469 | // terminated. 470 | // 471 | th_size = (thunk_count + 1) * sizeof(IMAGE_THUNK_DATA32); 472 | iid[i].OriginalFirstThunk = offset + CEED_IMPORT_SECTION_RVA; 473 | iid[i].FirstThunk = offset + th_size + CEED_IMPORT_SECTION_RVA; 474 | if (strcmp(dll_names[i], "kernel32.dll") == 0) { 475 | k32_fn_array = iid[i].FirstThunk + CEED_IMAGE_BASE_VA; 476 | } 477 | oth = (IMAGE_THUNK_DATA32 *)(import + offset); 478 | th = (IMAGE_THUNK_DATA32 *)(import + offset + th_size); 479 | offset += (2 * th_size); 480 | 481 | for (int j = 0; j < MAX_FUNCTIONS_IMPORT_PER_DLL; j++) { 482 | if (fn_names[i][j] == NULL) { 483 | break; 484 | } 485 | 486 | if (strcmp(fn_names[i][j], "GetStdHandle") == 0) { 487 | fn_GetStdHandle = k32_fn_array + (sizeof(u32) * j); 488 | } else if (strcmp(fn_names[i][j], "ReadConsoleA") == 0) { 489 | fn_ReadConsoleA = k32_fn_array + (sizeof(u32) * j); 490 | } else if (strcmp(fn_names[i][j], "WriteConsoleA") == 0) { 491 | fn_WriteConsoleA = k32_fn_array + (sizeof(u32) * j); 492 | } 493 | 494 | oth[j].AddressOfData = offset + CEED_IMPORT_SECTION_RVA; 495 | th[j].AddressOfData = oth[j].AddressOfData; 496 | 497 | iin = (pvoid)(import + offset); 498 | strcpy((char *)iin->Name, fn_names[i][j]); 499 | offset += (offsetof(IMAGE_IMPORT_BY_NAME, Name) + 500 | strlen(iin->Name) + 1); 501 | } 502 | } 503 | pe_set_exe_import_scn(ei, import, offset); 504 | } 505 | 506 | void 507 | pe_gen_exe_file(pvoid einfo) 508 | { 509 | pexe_info ei = einfo; 510 | FILE *exe_file; 511 | u32 hdr_size; 512 | u32 file_offset; 513 | 514 | if (func[0] == -1) 515 | { 516 | printf("Entry point function '_a' not found.\n"); 517 | exit(-1); 518 | } 519 | 520 | exe_file = fopen("a.exe", "wb+"); 521 | if (exe_file == NULL) 522 | { 523 | printf("Failed to create output file (a.exe).\n"); 524 | exit(errno); 525 | } 526 | 527 | pe_gen_data_section(ei); 528 | pe_write_fixed_hdrs(ei, exe_file); 529 | pe_write_optional_hdr(ei, exe_file); 530 | hdr_size = sizeof(dos_hdr) + sizeof(u32) + sizeof(IMAGE_FILE_HEADER) + 531 | sizeof(IMAGE_OPTIONAL_HEADER32) + 532 | (sizeof(IMAGE_SECTION_HEADER) * ei->scn_count); 533 | file_offset = round_up(hdr_size, CEED_FILE_ALIGN); 534 | for (int i = 0; i < ei->scn_count; i++) 535 | { 536 | psection_info si = ei->scn_list[i]; 537 | si->scn_hdr.PointerToRawData = file_offset; 538 | pe_write_section_hdr(si, exe_file); 539 | file_offset += si->scn_file_size; 540 | } 541 | file_offset = round_up(hdr_size, CEED_FILE_ALIGN); 542 | for (int i = 0; i < ei->scn_count; i++) 543 | { 544 | psection_info si = ei->scn_list[i]; 545 | pe_write_section_data(si, file_offset, exe_file); 546 | file_offset += si->scn_file_size; 547 | } 548 | 549 | fclose(exe_file); 550 | } 551 | 552 | void 553 | pe_emit_indirect_call(u32 fn_addr) 554 | { 555 | // call [fn_addr] 556 | emit8(0xff); 557 | emit8(0x15); 558 | emit32(fn_addr); 559 | } 560 | 561 | 562 | void 563 | pe_emit_get_std_handle(u8 handle_type, u32 save_location) 564 | { 565 | // push handle_type 566 | emit8(0x6a); 567 | emit8(handle_type); 568 | 569 | pe_emit_indirect_call(fn_GetStdHandle); 570 | 571 | // mov [data_section_va + save_location], eax 572 | emit8(0xa3); 573 | emit32(get_data_va(ei) + save_location); 574 | } 575 | 576 | #define STD_INPUT_HANDLE (-10) 577 | #define STD_OUTPUT_HANDLE (-11) 578 | 579 | void 580 | pe_emit_main_init() 581 | { 582 | // 583 | // We use this to invoke some code from main that stores handles to stdin 584 | // and stdout in global variable. 585 | // 586 | pe_emit_get_std_handle(STD_INPUT_HANDLE, CEED_STDIN_HANDLE_RVA); 587 | pe_emit_get_std_handle(STD_OUTPUT_HANDLE, CEED_STDOUT_HANDLE_RVA); 588 | 589 | } 590 | 591 | void 592 | pe_emit_main_exit() 593 | { 594 | // ret 595 | emit8(0xc3); 596 | } 597 | 598 | void 599 | pe_emit_write(u32 buf_addr, u32 buf_len) 600 | { 601 | // 602 | // Uses WriteConsole win32 API. 603 | // 604 | 605 | // push 0 (lpReserved -> NULL) 606 | emit16(0x006a); 607 | 608 | // push temp location (lpNumberOfCharsWritten) 609 | emit8(0x68); 610 | emit32(get_data_va(ei) + CEED_TEMP_U32_1); 611 | 612 | // push buf_len (nNumberOfCharsToWrite) 613 | emit8(0x68); 614 | emit32(buf_len); 615 | 616 | // push buf_addr (lpBuffer) 617 | emit8(0x68); 618 | emit32(buf_addr); 619 | 620 | // push stdout handle from saved location 621 | emit16(0x35ff); 622 | emit32(get_data_va(ei) + CEED_STDOUT_HANDLE_RVA); 623 | 624 | // call [fn_addr] 625 | emit16(0x15ff); 626 | emit32(fn_WriteConsoleA); 627 | } 628 | 629 | void 630 | pe_emit_write_reg_input() 631 | { 632 | // 633 | // Uses WriteConsole win32 API. 634 | // 635 | 636 | // push 0 (lpReserved -> NULL) 637 | emit16(0x006a); 638 | 639 | // push temp location (lpNumberOfCharsWritten) 640 | emit8(0x68); 641 | emit32(get_data_va(ei) + CEED_TEMP_U32_1); 642 | 643 | // push edx (edx represents buffer length) 644 | emit8(0x52); 645 | 646 | // push ecx (ecx == buf_addr (lpBuffer)) 647 | emit8(0x51); 648 | 649 | // push stdout handle from saved location 650 | emit16(0x35ff); 651 | emit32(get_data_va(ei) + CEED_STDOUT_HANDLE_RVA); 652 | 653 | // call [fn_addr] 654 | emit16(0x15ff); 655 | emit32(fn_WriteConsoleA); 656 | } 657 | 658 | void 659 | pe_emit_read(u32 buf_addr, u32 buf_len) 660 | { 661 | // 662 | // Uses ReadConsole win32 API. 663 | // 664 | 665 | // push 0 (pInputControl -> NULL) 666 | emit16(0x006a); 667 | 668 | // push temp location (lpNumberOfCharsRead) 669 | emit8(0x68); 670 | emit32(get_data_va(ei) + CEED_TEMP_U32_1); 671 | 672 | // push buf_len (nNumberOfCharsToRead) 673 | emit8(0x68); 674 | emit32(buf_len); 675 | 676 | // push buf_addr (lpBuffer) 677 | emit8(0x68); 678 | emit32(buf_addr); 679 | 680 | // push stdin handle from saved location 681 | emit16(0x35ff); 682 | emit32(get_data_va(ei) + CEED_STDIN_HANDLE_RVA); 683 | 684 | // call [fn_addr] 685 | emit16(0x15ff); 686 | emit32(fn_ReadConsoleA); 687 | } 688 | 689 | void 690 | pe_init() 691 | { 692 | ei = pe_alloc_exe_info(); 693 | // 694 | // We generate import section ahead of time, as the offsets in IAT (import 695 | // address table) are needed in code generation for console I/O. 696 | // 697 | pe_gen_import_section(ei); 698 | gen_exe_file = pe_gen_exe_file; 699 | set_exe_code_scn = pe_set_exe_code_scn; 700 | set_exe_rdata_scn = pe_set_exe_rdata_scn; 701 | get_code_va = pe_get_code_va; 702 | get_data_va = pe_get_data_va; 703 | get_rdata_va = pe_get_rdata_va; 704 | emit_main_exit = pe_emit_main_exit; 705 | emit_main_init = pe_emit_main_init; 706 | emit_write = pe_emit_write; 707 | emit_write_reg_input = pe_emit_write_reg_input; 708 | emit_read = pe_emit_read; 709 | } 710 | 711 | -------------------------------------------------------------------------------- /src/ceedrtl.c: -------------------------------------------------------------------------------- 1 | /*++ 2 | 3 | Copyright (c) 2017, Pankaj Garg 4 | All rights reserved. 5 | 6 | This software may be modified and distributed under the terms of the BSD 7 | license. See the LICENSE file for details. 8 | 9 | --*/ 10 | #include "ceed.h" 11 | 12 | u8 itoa_code[0x80] = 13 | { 14 | 0x55, 0x89, 0xE5, 0x83, 0xEC, 0x04, 0x83, 0xC3, 0x0B, 0xC6, 0x03, 0x00, 0xB9, 0x0A, 0x00, 0x00, 0x00, 0xC7, 0x45, 0xFC, 0x00, 0x00, 0x00, 0x00, 0x31, 0xFF, 0x83, 0xF8, 0x00, 0x7D, 0x07, 0xF7, 0xD8, 0xBF, 0x01, 0x00, 0x00, 0x00, 0x31, 0xD2, 0xF7, 0xF9, 0x83, 0xC2, 0x30, 0x4B, 0x88, 0x13, 0xFF, 0x45, 0xFC, 0x83, 0xF8, 0x00, 0x75, 0xEE, 0x83, 0xFF, 0x01, 0x75, 0x07, 0x4B, 0xC6, 0x03, 0x2D, 0xFF, 0x45, 0xFC, 0x89, 0xD9, 0x8B, 0x55, 0xFC, 0x89, 0xEC, 0x5D, 0xC3 15 | }; 16 | 17 | u8 atoi_code[0x80] = 18 | { 19 | 0x55, 0x89, 0xE5, 0x83, 0xEC, 0x04, 0x31, 0xFF, 0xB9, 0x0A, 0x00, 0x00, 0x00, 0x31, 0xC0, 0x31, 0xD2, 0xC7, 0x45, 0xFC, 0x00, 0x00, 0x00, 0x00, 0x31, 0xF6, 0x8A, 0x14, 0x3B, 0x80, 0xFA, 0x2D, 0x75, 0x06, 0xBE, 0x01, 0x00, 0x00, 0x00, 0x47, 0x8B, 0x45, 0xFC, 0xF7, 0xE1, 0x70, 0x36, 0x8A, 0x14, 0x3B, 0x80, 0xFA, 0x30, 0x7C, 0x2E, 0x80, 0xFA, 0x39, 0x7F, 0x29, 0x80, 0xEA, 0x30, 0x01, 0xD0, 0x89, 0x45, 0xFC, 0x47, 0x80, 0x3C, 0x3B, 0x00, 0x74, 0x0E, 0x80, 0x3C, 0x3B, 0x0A, 0x74, 0x08, 0x80, 0x3C, 0x3B, 0x0D, 0x74, 0x02, 0xEB, 0xCF, 0x8B, 0x45, 0xFC, 0x83, 0xFE, 0x01, 0x75, 0x06, 0xF7, 0xD8, 0xEB, 0x02, 0x31, 0xC0, 0x89, 0xEC, 0x5D, 0xC3 20 | }; 21 | 22 | -------------------------------------------------------------------------------- /src/makefile: -------------------------------------------------------------------------------- 1 | CC=gcc 2 | CFLAGS=-w -std=c99 3 | DEPS = ceed.h ceed.tab.h 4 | OBJ = lex.yy.o ceed.tab.o ceedcmpl.o ceedelf.o ceedpe.o ceedrtl.o 5 | 6 | lex.yy.c ceed.tab.c ceed.tab.h: ceed.l ceed.y 7 | bison -d ceed.y 8 | flex ceed.l 9 | 10 | %.o: %.c $(DEPS) 11 | $(CC) -c -o $@ $< $(CFLAGS) 12 | 13 | all: ceed 14 | 15 | ceed: $(OBJ) 16 | gcc -o $@ $^ $(CFLAGS) 17 | 18 | clean: 19 | rm -f *.o 20 | rm -f lex.yy.c ceed.tab.c ceed.tab.h 21 | rm -f ceed ceed.exe 22 | 23 | -------------------------------------------------------------------------------- /tst/hello.e: -------------------------------------------------------------------------------- 1 | _a { 2 | ws("Hello world!"); nl(); 3 | } 4 | -------------------------------------------------------------------------------- /tst/loop.e: -------------------------------------------------------------------------------- 1 | _b { 2 | A = A + 1; 3 | wi(A); nl(); 4 | _b(); 5 | } 6 | 7 | _a { 8 | _b(); 9 | } 10 | -------------------------------------------------------------------------------- /tst/math.e: -------------------------------------------------------------------------------- 1 | /* 2 | * A helper function to show the concept of functions and global variables. 3 | * It prints value of global variable R, followed by a new line. 4 | */ 5 | _b { 6 | wi(R); nl(); 7 | } 8 | 9 | /* 10 | * This is the entry point, equivalent to c main function. 11 | * It does simple calculation and stores result in global variable: R 12 | */ 13 | _a { 14 | nl(); 15 | ws("-------------------------"); nl(); 16 | ws("| Choice Operation |"); nl(); 17 | ws("-------------------------"); nl(); 18 | ws("| 1 Addition |"); nl(); 19 | ws("| 2 Substraction |"); nl(); 20 | ws("| 0 Exit |"); nl(); 21 | ws("-------------------------"); nl(); 22 | ws("Please enter your choice: "); 23 | x = ri(); 24 | if (x == 1) { 25 | ws("Enter first number: "); 26 | a = ri(); 27 | ws("Enter second number: "); 28 | b = ri(); 29 | R = a + b; 30 | ws("Result: "); 31 | _b(); 32 | } el { 33 | if (x == 2) { 34 | ws("Enter first number: "); 35 | a = ri(); 36 | ws("Enter second number: "); 37 | b = ri(); 38 | ws("Result: "); 39 | wi(a - b); nl(); 40 | } 41 | } 42 | } 43 | --------------------------------------------------------------------------------