├── .gitignore ├── LICENSE ├── Makefile ├── codegen.c ├── codegen.h ├── lexer.c ├── lexer.h ├── main.c ├── parser.c ├── parser.h ├── test.c ├── trie.c └── trie.h /.gitignore: -------------------------------------------------------------------------------- 1 | **/*.o 2 | **/*.out 3 | **/*.core 4 | **/*~ 5 | cc 6 | out.S 7 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2025, Lorenzo Torres 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without modification, 6 | are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright notice, 9 | this list of conditions and the following disclaimer. 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | * Neither the name of {{ project }} nor the names of its contributors 14 | may be used to endorse or promote products derived from this software 15 | without specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 21 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 22 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 23 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 24 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 25 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 26 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 27 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CC:=cc 2 | CFLAGS:=-Wall -Wextra -std=c11 -pedantic -g -I./ 3 | LIBS:= 4 | BIN=cc 5 | 6 | SRC:=main.c lexer.c trie.c parser.c codegen.c 7 | OBJ:=${SRC:.c=.o} 8 | 9 | all: ${OBJ} 10 | ${CC} ${LIBS} ${OBJ} -o ${BIN} 11 | 12 | %.o: %.c 13 | ${CC} ${CFLAGS} -c $< -o $@ 14 | 15 | .PHONY: clean 16 | clean: 17 | @rm -rfv ${OBJ} ${BIN} 18 | -------------------------------------------------------------------------------- /codegen.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | extern struct literal_pair *literals_head; 8 | 9 | static const char *call_regs[] = { 10 | "rdi", "rsi", "rdx", "rcx", "r8", "r9" 11 | }; 12 | 13 | static struct scope *scope_stack = NULL; 14 | static int main_function = 0; 15 | 16 | struct trie *variables = NULL; 17 | static size_t str_count = 0; 18 | static size_t base_offset = 0; 19 | 20 | void push_scope(size_t base, size_t pointer) 21 | { 22 | struct scope *scope = (struct scope *) malloc(sizeof(struct scope)); 23 | scope->stack_base = base; 24 | scope->stack_pointer = pointer; 25 | 26 | scope->prev = scope_stack; 27 | scope_stack->next = scope; 28 | scope_stack = scope; 29 | } 30 | 31 | struct scope *pop_scope() 32 | { 33 | struct scope *tmp = scope_stack; 34 | scope_stack = scope_stack->prev; 35 | tmp->prev = NULL; 36 | return tmp; 37 | } 38 | 39 | void gen_literals(FILE *f) 40 | { 41 | fprintf(f, ".data\n"); 42 | while (literals_head != NULL) { 43 | fprintf(f, "str%d: .asciz \"%s\"\n", literals_head->label, literals_head->literal); 44 | 45 | literals_head = literals_head->next; 46 | } 47 | } 48 | 49 | void gen_block(FILE *f, struct block_member *block) 50 | { 51 | while (block != NULL) { 52 | gen_statement(f, block->value); 53 | 54 | block = block->next; 55 | } 56 | } 57 | 58 | void gen_assignment(FILE *f, ast_statement_t *assignment) 59 | { 60 | gen_mov(f, assignment->statement.var_assign.value, "r10"); 61 | fprintf(f, "mov %%r10, -%x(%%rbp)\n", trie_get(variables, assignment->statement.var_assign.identifier)); 62 | } 63 | 64 | void gen_def(FILE *f, ast_statement_t *statement) 65 | { 66 | trie_insert(variables, statement->statement.var_def.identifier, base_offset); 67 | base_offset += ast_type_size(statement->statement.var_def.ty); 68 | 69 | if (statement->statement.var_def.assignment != NULL) { 70 | gen_assignment(f, statement->statement.var_def.assignment); 71 | } 72 | } 73 | 74 | void gen_mov(FILE *f, ast_node_t *expr, char *reg) 75 | { 76 | if (expr == NULL) return; 77 | 78 | if (expr->ty == INTEGER) { 79 | fprintf(f, "mov $%d, %%%s\n", expr->expr.integer, reg); 80 | } else if (expr->ty == STRING_LIT) { 81 | fprintf(f, "mov $str%d, %%%s\n", expr->expr.string, reg); 82 | } else if (expr->ty == ID) { 83 | fprintf(f, "mov -0x%x(%%rbp), %%%s\n", trie_get(variables, expr->expr.identifier), reg); 84 | } else if (expr->ty == CALL) { 85 | for (int i=0; i < expr->expr.call.arg_count; i++) { 86 | gen_mov(f, expr->expr.call.args[i], call_regs[i]); 87 | } 88 | 89 | fprintf(f, "call %s\n", expr->expr.call.identifier); 90 | fprintf(f, "mov %%rax, %%%s\n", reg); 91 | } else if (expr->ty == BINARY) { 92 | gen_mov(f, expr->expr.binary.left, "r15"); 93 | gen_mov(f, expr->expr.binary.right, reg); 94 | switch (expr->expr.binary.op) { 95 | case OP_PLUS: 96 | fprintf(f, "add %%r15, %%%s\n", reg); 97 | break; 98 | case OP_MINUS: 99 | fprintf(f, "sub %%r15, %%%s\n", reg); 100 | break; 101 | case OP_STAR: 102 | fprintf(f, "mul %%r15, %%%s\n", reg); 103 | break; 104 | case OP_SLASH: 105 | fprintf(f, "div %%r15, %%%s\n", reg); 106 | break; 107 | } 108 | } 109 | } 110 | 111 | void gen_expr(FILE *f, ast_node_t *expr) { 112 | if (expr->ty == CALL) { 113 | for (int i=0; i < expr->expr.call.arg_count; i++) { 114 | gen_mov(f, expr->expr.call.args[i], call_regs[i]); 115 | } 116 | 117 | fprintf(f, "call %s\n", expr->expr.call.identifier); 118 | } 119 | } 120 | 121 | void gen_function(FILE *f, ast_statement_t *statement) 122 | { 123 | if (statement->statement.function.block == NULL) { 124 | return; 125 | } 126 | 127 | variables = (struct trie *) malloc(sizeof(struct trie)); 128 | memset(variables, 0x0, sizeof(struct trie)); 129 | base_offset = 0; 130 | 131 | if (strcmp(statement->statement.function.identifier, "main") == 0) { 132 | main_function = 1; 133 | } 134 | 135 | 136 | 137 | fprintf(f, "%s:\n", statement->statement.function.identifier); 138 | fprintf(f, "mov %%rsp, %%rbp\n"); 139 | if (statement->statement.function.block != NULL) { 140 | fprintf(f, "sub $0x%x, %%rsp\n", statement->statement.function.block->stack_size); 141 | 142 | for (int i=0; i < statement->statement.function.arg_count; i++) { 143 | struct arg a = statement->statement.function.args[i]; 144 | trie_insert(variables, a.identifier, base_offset); 145 | fprintf(f, "mov %%%s, -%x(%%rbp)\n", call_regs[i], base_offset); 146 | base_offset += ast_type_size(a.ty); 147 | } 148 | 149 | gen_block(f, statement->statement.function.block); 150 | } 151 | 152 | free(variables); 153 | if (statement->statement.function.ty == VOID_T) { 154 | fprintf(f, "leave\nret\n"); 155 | } 156 | } 157 | 158 | void gen_return(FILE *f, ast_statement_t *statement) 159 | { 160 | if (main_function) { 161 | main_function = 0; 162 | fprintf(f, "leave\n"); 163 | gen_mov(f, statement->statement.ret.value, "rdi"); 164 | fprintf(f, "mov $0x3c, %%rax\nsyscall\n"); 165 | } else { 166 | gen_mov(f, statement->statement.ret.value, "rax"); 167 | fprintf(f, "leave\nret\n"); 168 | } 169 | } 170 | 171 | void gen_statement(FILE *f, ast_statement_t *statement) 172 | { 173 | if (statement != NULL) { 174 | ast_type_t ty = statement->t; 175 | if (ty == FUNCTION) { 176 | gen_function(f, statement); 177 | } else if (ty == VAR_DEF) { 178 | gen_def(f, statement); 179 | } else if (ty == VAR_ASSIGN) { 180 | gen_assignment(f, statement); 181 | } else if (ty == EXPRESSION) { 182 | gen_expr(f, statement->statement.expression); 183 | } else if (ty == RETURN_STATEMENT) { 184 | gen_return(f, statement); 185 | } 186 | } 187 | } 188 | 189 | void gen_asm(FILE *f, struct statement_list *statements) 190 | { 191 | gen_literals(f); 192 | 193 | scope_stack = (struct scope *) malloc(sizeof(struct scope)); 194 | 195 | fprintf(f, ".text\n.global main\n"); 196 | 197 | while (statements != NULL) { 198 | gen_statement(f, statements->statement); 199 | 200 | statements = statements->next; 201 | } 202 | 203 | while (scope_stack) { 204 | struct scope *tmp = scope_stack->prev; 205 | scope_stack = scope_stack->prev; 206 | free(tmp); 207 | } 208 | } 209 | -------------------------------------------------------------------------------- /codegen.h: -------------------------------------------------------------------------------- 1 | #ifndef CODEGEN_H 2 | #define CODEGEN_H 3 | 4 | #include 5 | #include 6 | 7 | struct scope { 8 | size_t stack_base; 9 | size_t stack_pointer; 10 | 11 | struct scope *next; 12 | struct scope *prev; 13 | }; 14 | 15 | void gen_def(FILE *f, ast_statement_t *statement); 16 | void gen_assignment(FILE *f, ast_statement_t *assignment); 17 | void gen_mov(FILE *f, ast_node_t *expr, char *reg); 18 | void gen_function(FILE *f, ast_statement_t *statement); 19 | void gen_block(FILE *f, struct block_member *block); 20 | void gen_return(FILE *f, ast_statement_t *statement); 21 | void gen_statement(FILE *f, ast_statement_t *statement); 22 | void gen_asm(FILE *f, struct statement_list* statements); 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /lexer.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | struct trie keywords = {0}; 11 | token_t *list; 12 | token_t *head; 13 | 14 | void lexer_push(token_type_t ty, char *lexeme) 15 | { 16 | list->next = (token_t *) malloc(sizeof(token_t)); 17 | list->next->ty = ty; 18 | list->next->lexeme = lexeme; 19 | 20 | list = list->next; 21 | } 22 | 23 | token_t *lexer_parse(char *source) 24 | { 25 | trie_insert(&keywords, "typedef", TYPEDEF); 26 | trie_insert(&keywords, "struct", STRUCT); 27 | trie_insert(&keywords, "for", FOR); 28 | trie_insert(&keywords, "if", IF); 29 | trie_insert(&keywords, "while", WHILE); 30 | trie_insert(&keywords, "else", ELSE); 31 | trie_insert(&keywords, "continue", CONTINUE); 32 | trie_insert(&keywords, "case", CASE); 33 | trie_insert(&keywords, "switch", SWITCH); 34 | trie_insert(&keywords, "break", BREAK); 35 | trie_insert(&keywords, "goto", GOTO); 36 | trie_insert(&keywords, "static", STATIC); 37 | trie_insert(&keywords, "const", CONST); 38 | trie_insert(&keywords, "long", LONG); 39 | trie_insert(&keywords, "short", SHORT); 40 | trie_insert(&keywords, "nsigned", UNSIGNED); 41 | trie_insert(&keywords, "char", CHAR); 42 | trie_insert(&keywords, "int", INT); 43 | trie_insert(&keywords, "float", FLOAT); 44 | trie_insert(&keywords, "double", DOUBLE); 45 | trie_insert(&keywords, "void", VOID); 46 | trie_insert(&keywords, "return", RETURN); 47 | trie_insert(&keywords, "asm", ASM); 48 | list = (token_t *) malloc(sizeof(token_t)); 49 | head = list; 50 | bzero(list, sizeof(token_t)); 51 | 52 | for (size_t i=0; i < strlen(source); i++) { 53 | char current = source[i]; 54 | char next; 55 | 56 | if (isspace(current)) { 57 | continue; 58 | } 59 | 60 | switch (current) { 61 | case '+': 62 | next = source[i+1]; 63 | if (source[i+1] == '+') { 64 | lexer_push(PLUS_PLUS, "++"); 65 | i++; 66 | } else if (source[i+1] == '=') { 67 | lexer_push(PLUS_EQ, "+="); 68 | i++; 69 | } else { 70 | lexer_push(PLUS, "+"); 71 | } 72 | break; 73 | case '-': 74 | next = source[i+1]; 75 | if (source[i+1] == '-') { 76 | lexer_push(MINUS_MINUS, "--"); 77 | i++; 78 | } else if (source[i+1] == '=') { 79 | lexer_push(MINUS_EQ, "-="); 80 | i++; 81 | } else if (source[i+1] == '>') { 82 | lexer_push(ARROW, "->"); 83 | i++; 84 | } else { 85 | lexer_push(MINUS, "-"); 86 | } 87 | break; 88 | case '*': 89 | next = source[i+1]; 90 | if (source[i+1] == '=') { 91 | lexer_push(STAR_EQ, "*="); 92 | i++; 93 | } else { 94 | lexer_push(STAR, "*"); 95 | } 96 | break; 97 | case '/': 98 | next = source[i+1]; 99 | if (source[i+1] == '=') { 100 | lexer_push(SLASH_EQ, "*="); 101 | i++; 102 | } else { 103 | lexer_push(SLASH, "*"); 104 | } 105 | break; 106 | case '=': 107 | next = source[i+1]; 108 | if (source[i+1] == '=') { 109 | lexer_push(EQUAL, "=="); 110 | i++; 111 | } else { 112 | lexer_push(ASSIGN, "="); 113 | } 114 | break; 115 | case '>': 116 | next = source[i+1]; 117 | if (source[i+1] == '=') { 118 | lexer_push(GREATER_EQ, ">="); 119 | i++; 120 | } else { 121 | lexer_push(GREATER, ">"); 122 | } 123 | break; 124 | case '<': 125 | next = source[i+1]; 126 | if (source[i+1] == '=') { 127 | lexer_push(LESS_EQ, "<="); 128 | i++; 129 | } else { 130 | lexer_push(LESS, "<"); 131 | } 132 | break; 133 | case '!': 134 | next = source[i+1]; 135 | if (source[i+1] == '=') { 136 | lexer_push(NOT_EQ, "!="); 137 | i++; 138 | } else { 139 | lexer_push(NOT, "!"); 140 | } 141 | break; 142 | case '|': 143 | next = source[i+1]; 144 | if (source[i+1] == '|') { 145 | lexer_push(LOG_OR, "||"); 146 | i++; 147 | } else if (source[i+1] == '=') { 148 | lexer_push(OR_EQ, "|="); 149 | i++; 150 | } else { 151 | lexer_push(OR, "|"); 152 | } 153 | break; 154 | case '&': 155 | next = source[i+1]; 156 | if (source[i+1] == '&') { 157 | lexer_push(LOG_AND, "&&"); 158 | i++; 159 | } else if (source[i+1] == '=') { 160 | lexer_push(AND_EQ, "&="); 161 | i++; 162 | } else { 163 | lexer_push(AND, "&"); 164 | } 165 | break; 166 | case '^': 167 | next = source[i+1]; 168 | if (source[i+1] == '=') { 169 | lexer_push(XOR_EQ, "^="); 170 | i++; 171 | } else { 172 | lexer_push(XOR, "^"); 173 | } 174 | break; 175 | case '(': 176 | lexer_push(L_PAREN, "("); 177 | break; 178 | case ')': 179 | lexer_push(R_PAREN, ")"); 180 | break; 181 | case '[': 182 | lexer_push(L_SQUARE, "["); 183 | break; 184 | case ']': 185 | lexer_push(R_SQUARE, "]"); 186 | break; 187 | case '{': 188 | lexer_push(L_CURLY, "{"); 189 | break; 190 | case '}': 191 | lexer_push(R_CURLY, "}"); 192 | break; 193 | case '.': 194 | lexer_push(DOT, "."); 195 | break; 196 | case ',': 197 | lexer_push(COMMA, ","); 198 | break; 199 | case ':': 200 | lexer_push(COLON, ":"); 201 | break; 202 | case ';': 203 | lexer_push(SEMICOLON, ";"); 204 | break; 205 | 206 | } 207 | 208 | if (isdigit(current)) { 209 | char *ptr = &source[i]; 210 | size_t len = i; 211 | i++; 212 | current = source[i]; 213 | 214 | while (isdigit(current)) { 215 | i++; 216 | current = source[i]; 217 | } 218 | 219 | len = i - len; 220 | char *lexeme = malloc(sizeof(char) * (len+1)); 221 | strncpy(lexeme, ptr, len); 222 | lexeme[len] = '\0'; 223 | lexer_push(NUMBER, lexeme); 224 | i--; 225 | } 226 | 227 | if (current == '"') { 228 | char *ptr = &source[i+1]; 229 | size_t len = i+1; 230 | i++; 231 | current = source[i]; 232 | 233 | while (current != '"') { 234 | i++; 235 | current = source[i]; 236 | } 237 | 238 | len = i - len; 239 | char *lexeme = malloc(sizeof(char) * (len+1)); 240 | strncpy(lexeme, ptr, len); 241 | lexeme[len] = '\0'; 242 | 243 | lexer_push(STRING, lexeme); 244 | } 245 | 246 | if (current == '#') { 247 | char *ptr = &source[i+1]; 248 | size_t len = i+1; 249 | i++; 250 | current = source[i]; 251 | 252 | while (current != '\n') { 253 | i++; 254 | current = source[i]; 255 | } 256 | 257 | len = i - len; 258 | char *lexeme = malloc(sizeof(char) * (len+1)); 259 | strncpy(lexeme, ptr, len); 260 | lexeme[len] = '\0'; 261 | 262 | lexer_push(MACRO, lexeme); 263 | i--; 264 | } 265 | 266 | if (isalpha(current)) { 267 | char *ptr = &source[i]; 268 | size_t len = i; 269 | i++; 270 | current = source[i]; 271 | 272 | while (isalnum(current)) { 273 | i++; 274 | current = source[i]; 275 | } 276 | 277 | len = i - len; 278 | char *lexeme = malloc(sizeof(char) * (len+1)); 279 | strncpy(lexeme, ptr, len); 280 | lexeme[len] = '\0'; 281 | token_type_t ty = trie_get(&keywords, lexeme); 282 | if (ty == -1) ty = IDENTIFIER; 283 | 284 | lexer_push(ty, lexeme); 285 | i--; 286 | } 287 | } 288 | 289 | return head; 290 | } 291 | 292 | -------------------------------------------------------------------------------- /lexer.h: -------------------------------------------------------------------------------- 1 | #ifndef LEXER_H 2 | #define LEXER_H 3 | 4 | typedef enum token_type { 5 | // literals 6 | NUMBER, 7 | IDENTIFIER, 8 | STRING, 9 | 10 | // arithmetic operators 11 | PLUS, 12 | MINUS, 13 | STAR, 14 | SLASH, 15 | MOD, 16 | 17 | PLUS_EQ, 18 | MINUS_EQ, 19 | STAR_EQ, 20 | SLASH_EQ, 21 | PLUS_PLUS, 22 | MINUS_MINUS, 23 | ASSIGN, 24 | 25 | // logic operators 26 | GREATER, 27 | LESS, 28 | GREATER_EQ, 29 | LESS_EQ, 30 | NOT_EQ, 31 | EQUAL, 32 | LOG_OR, 33 | LOG_AND, 34 | 35 | // bit operators 36 | NOT, 37 | OR, 38 | OR_EQ, 39 | AND, 40 | AND_EQ, 41 | XOR, 42 | XOR_EQ, 43 | 44 | // keywords 45 | TYPEDEF, 46 | STRUCT, 47 | FOR, 48 | IF, 49 | WHILE, 50 | ELSE, 51 | CONTINUE, 52 | CASE, 53 | SWITCH, 54 | BREAK, 55 | GOTO, 56 | STATIC, 57 | CONST, 58 | LONG, 59 | SHORT, 60 | UNSIGNED, 61 | CHAR, 62 | INT, 63 | FLOAT, 64 | DOUBLE, 65 | VOID, 66 | RETURN, 67 | ASM, 68 | 69 | // special 70 | L_PAREN, 71 | R_PAREN, 72 | L_SQUARE, 73 | R_SQUARE, 74 | L_CURLY, 75 | R_CURLY, 76 | COMMA, 77 | DOT, 78 | ARROW, 79 | COLON, 80 | SEMICOLON, 81 | 82 | MACRO, 83 | } token_type_t; 84 | 85 | typedef struct token { 86 | token_type_t ty; 87 | char *lexeme; 88 | struct token *next; 89 | } token_t; 90 | 91 | token_t *lexer_parse(char *source); 92 | void lexer_push(token_type_t ty, char *lexeme); 93 | 94 | #endif 95 | -------------------------------------------------------------------------------- /main.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | int main(int argc, char **argv) 8 | { 9 | if (argc < 2) { 10 | printf("Usage: cc [filename]\n"); 11 | exit(1); 12 | } 13 | 14 | FILE *source = fopen(argv[1], "r"); 15 | size_t size = 0; 16 | fseek(source, 0, SEEK_END); 17 | size = ftell(source); 18 | fseek(source, 0, SEEK_SET); 19 | 20 | char *buffer = (char *) malloc(size+1); 21 | fread(buffer, size, 1, source); 22 | buffer[size] = '\0'; 23 | 24 | token_t *l = lexer_parse(buffer); 25 | 26 | struct statement_list *ast = ast_parse(); 27 | FILE *f = fopen("out.S", "w"); 28 | gen_asm(f, ast); 29 | 30 | fclose(f); 31 | 32 | return 0; 33 | } 34 | -------------------------------------------------------------------------------- /parser.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | extern token_t *list; 8 | extern token_t *head; 9 | struct literal_pair *literals_head; 10 | static size_t str_count; 11 | static struct trie functions; 12 | 13 | 14 | token_t *next(void) 15 | { 16 | list = list->next; 17 | return list; 18 | } 19 | 20 | uint8_t check_consume(token_type_t ty) 21 | { 22 | if (!list) return 0; 23 | if (list->ty == ty) { 24 | list = list->next; 25 | return 1; 26 | } 27 | 28 | return 0; 29 | } 30 | 31 | uint8_t check_next(token_type_t ty) 32 | { 33 | if (!(list->next)) return 0; 34 | return list->next->ty == ty; 35 | } 36 | 37 | uint8_t check_token(token_type_t ty) 38 | { 39 | if (!list) return 0; 40 | return list->ty == ty; 41 | } 42 | 43 | struct statement_list *ast_parse(void) 44 | { 45 | list = head; 46 | list = list->next; 47 | 48 | struct statement_list *statements = (struct statement_list *) malloc(sizeof(struct statement_list)); 49 | struct statement_list *statements_head = statements; 50 | 51 | while (list != NULL) { 52 | statements->statement = ast_statement(); 53 | statements->next = (struct statement_list *) malloc(sizeof(struct statement_list)); 54 | statements = statements->next; 55 | } 56 | 57 | return statements_head; 58 | } 59 | 60 | ast_statement_t *ast_function(void) 61 | { 62 | ast_statement_t *fn = malloc(sizeof(ast_statement_t)); 63 | fn->t = FUNCTION; 64 | fn->statement.function.ty = ast_type(); 65 | 66 | if (!check_token(IDENTIFIER)) { 67 | // TODO: error 68 | } 69 | fn->statement.function.identifier = list->lexeme; 70 | next(); 71 | check_consume(L_PAREN); 72 | 73 | size_t stack_size = 0; 74 | 75 | token_t *tmp = list; 76 | expr_type_t arg_type = ast_type(); 77 | if ((arg_type & TYPE_MASK) == VOID_T) { 78 | if (arg_type & TYPE_POINTER) { 79 | goto check_args; 80 | } else { 81 | check_consume(R_PAREN); 82 | fn->statement.function.arg_count = 0; 83 | } 84 | } else if (!check_consume(R_PAREN)) { 85 | check_args: 86 | list = tmp; 87 | size_t arg_count = 0; 88 | while (!check_token(R_PAREN)) { 89 | if (check_consume(COMMA)) { 90 | arg_count += 1; 91 | } else { 92 | next(); 93 | } 94 | } 95 | arg_count += 1; 96 | fn->statement.function.arg_count = arg_count; 97 | fn->statement.function.args = (struct arg *) malloc(sizeof(struct arg) * arg_count); 98 | struct arg *args = fn->statement.function.args; 99 | 100 | list = tmp; 101 | for (size_t i=0; !check_consume(R_PAREN); i++) { 102 | args[i].ty = ast_type(); 103 | stack_size += ast_type_size(args[i].ty); 104 | if (!check_token(IDENTIFIER)) { 105 | args[i].identifier = NULL; 106 | } else { 107 | args[i].identifier = list->lexeme; 108 | next(); 109 | } 110 | 111 | check_consume(COMMA); 112 | } 113 | } else { 114 | fn->statement.function.arg_count = 0; 115 | } 116 | 117 | if (check_consume(SEMICOLON)) { 118 | fn->statement.function.block = NULL; 119 | } else if (check_token(L_CURLY)) { 120 | fn->statement.function.block = ast_block(); 121 | fn->statement.function.block->stack_size += stack_size; 122 | } else if (check_consume(SEMICOLON)) { 123 | fn->statement.function.block = NULL; 124 | } else { 125 | // TODO: error 126 | } 127 | 128 | trie_insert(&functions, fn->statement.function.identifier, fn->statement.function.ty); 129 | 130 | return fn; 131 | } 132 | 133 | // TODO: implement this 134 | /* 135 | ast_statement_t ast_asm(void) 136 | { 137 | if (!check_consume(L_PAREN)) { 138 | // TODO: error 139 | } 140 | 141 | ast_statement_t *inline_asm = (ast_statement_t *) malloc(sizeof(ast_statement_t)); 142 | inline_asm->t = INLINE_ASM; 143 | if (!check_token(STRING)) { 144 | // TODO: error 145 | } 146 | inline_asm->statement.asm.source = list->lexeme; 147 | next(); 148 | 149 | if (!check_consume(COLON)) { 150 | // TODO: error 151 | } 152 | 153 | size_t value_count = 0; 154 | token_t *tmp = list; 155 | while (!check_token(R_PAREN)) { 156 | if (check_consume(COMMA)) { 157 | value_count += 1; 158 | } else { 159 | next(); 160 | } 161 | } 162 | 163 | list = tmp; 164 | arg_count += 1; 165 | inline_asm->statement.asm.value_count = arg_count; 166 | inline_asm->statement.asm.values = (ast_node_t **) malloc(sizeof(ast_node_t *) * value_count); 167 | ast_node_t **values = inline_asm->statement.asm.values; 168 | 169 | for (size_t i=0; !check_consume(R_PAREN); i++) { 170 | values[i] = expression(); 171 | 172 | check_consume(COMMA); 173 | } 174 | 175 | if (!check_consume(R_PAREN)) { 176 | // TODO: error 177 | } 178 | 179 | return inline_asm; 180 | } */ 181 | 182 | struct block_member *ast_block(void) 183 | { 184 | struct block_member *block = NULL; 185 | struct block_member *head = block; 186 | size_t stack_size = 0; 187 | if (!check_consume(L_CURLY)) { 188 | // TODO: error 189 | } 190 | 191 | if (check_consume(R_CURLY)) { 192 | return NULL; 193 | } 194 | 195 | while (!check_consume(R_CURLY)) { 196 | ast_statement_t *statement = ast_statement(); 197 | if (statement->t == VAR_DEF) { 198 | stack_size += ast_type_size(statement->statement.var_def.ty); 199 | } 200 | if (block == NULL) { 201 | block = malloc(sizeof(struct block_member)); 202 | } 203 | 204 | if (head == NULL) { 205 | head = block; 206 | } 207 | 208 | block->value = statement; 209 | 210 | block->next = malloc(sizeof(struct block_member)); 211 | 212 | block = block->next; 213 | block->value = NULL; 214 | } 215 | 216 | head->stack_size = stack_size; 217 | return head; 218 | } 219 | 220 | ast_node_t *ast_call(void) 221 | { 222 | ast_node_t *call = malloc(sizeof(ast_node_t)); 223 | call->ty = CALL; 224 | if (!check_token(IDENTIFIER)) { 225 | // TODO: error 226 | } 227 | 228 | call->expr.call.identifier = list->lexeme; 229 | next(); 230 | 231 | size_t arg_count = 0; 232 | 233 | if (!check_consume(L_PAREN)) { 234 | // TODO: error 235 | } 236 | 237 | token_t *tmp = list; 238 | while (!check_token(R_PAREN)) { 239 | if (check_consume(COMMA)) { 240 | arg_count += 1; 241 | } else { 242 | next(); 243 | } 244 | } 245 | 246 | list = tmp; 247 | arg_count += 1; 248 | call->expr.call.arg_count = arg_count; 249 | call->expr.call.args = (ast_node_t **) malloc(sizeof(ast_node_t *) * arg_count); 250 | ast_node_t **args = call->expr.call.args; 251 | 252 | for (size_t i=0; !check_consume(R_PAREN); i++) { 253 | args[i] = expression(); 254 | 255 | check_consume(COMMA); 256 | } 257 | 258 | if (!check_consume(SEMICOLON)) { 259 | // TODO: error 260 | } 261 | 262 | call->expr_ty = trie_get(&functions, call->expr.call.identifier); 263 | 264 | return call; 265 | } 266 | 267 | ast_statement_t *ast_variable(void) 268 | { 269 | ast_statement_t *var = (ast_statement_t *) malloc(sizeof(ast_statement_t)); 270 | 271 | var->t = VAR_DEF; 272 | var->statement.var_def.ty = ast_type(); 273 | if (!check_token(IDENTIFIER)) { 274 | // TODO: error 275 | } 276 | 277 | var->statement.var_def.identifier = list->lexeme; 278 | next(); 279 | 280 | if (check_consume(ASSIGN)) { 281 | ast_statement_t *assignment = (ast_statement_t *) malloc(sizeof(ast_statement_t)); 282 | assignment->t = VAR_ASSIGN; 283 | assignment->statement.var_assign.identifier = var->statement.var_def.identifier; 284 | assignment->statement.var_assign.value = expression(); 285 | var->statement.var_def.assignment = assignment; 286 | } 287 | 288 | if (!check_consume(SEMICOLON)) { 289 | // TODO: error 290 | } 291 | 292 | return var; 293 | } 294 | 295 | 296 | ast_statement_t *ast_return(void) 297 | { 298 | ast_statement_t *value = (ast_statement_t *) malloc(sizeof(ast_statement_t)); 299 | value->t = RETURN_STATEMENT; 300 | value->statement.ret.value = expression(); 301 | if (!check_consume(SEMICOLON)) { 302 | // TODO: error 303 | } 304 | return value; 305 | } 306 | 307 | ast_statement_t *ast_statement(void) 308 | { 309 | token_t *tmp = list; 310 | if (check_consume(RETURN)) { 311 | return ast_return(); 312 | } else { 313 | switch (list->ty) { 314 | case LONG: 315 | case SHORT: 316 | case UNSIGNED: 317 | case CHAR: 318 | case INT: 319 | case FLOAT: 320 | case DOUBLE: 321 | case VOID: { 322 | tmp = list; 323 | ast_type(); 324 | check_consume(IDENTIFIER); 325 | if (check_token(ASSIGN)) { 326 | list = tmp; 327 | return ast_variable(); 328 | } else if (check_token(L_PAREN)) { 329 | list = tmp; 330 | return ast_function(); 331 | } 332 | break; 333 | } 334 | default: 335 | break; 336 | } 337 | 338 | ast_statement_t *expr = (ast_statement_t *) malloc(sizeof(ast_statement_t)); 339 | expr->t = EXPRESSION; 340 | expr->statement.expression = expression(); 341 | if (!check_consume(SEMICOLON)) { 342 | // TODO: error 343 | } 344 | return expr; 345 | } 346 | } 347 | 348 | ast_node_t *expression(void) 349 | { 350 | ast_node_t *t = term(); 351 | 352 | while (check_token(PLUS) || check_token(MINUS)) { 353 | ast_node_t *b = malloc(sizeof(ast_node_t)); 354 | b->ty = BINARY; 355 | b->expr.binary.op = check_token(PLUS) ? OP_PLUS : OP_MINUS; 356 | b->expr.binary.left = t; 357 | next(); 358 | b->expr.binary.right = term(); 359 | 360 | t = b; 361 | } 362 | 363 | return t; 364 | } 365 | 366 | ast_node_t *term(void) 367 | { 368 | ast_node_t *t = factor(); 369 | 370 | while (check_token(STAR) || check_token(SLASH)) { 371 | ast_node_t *b = malloc(sizeof(ast_node_t)); 372 | b->ty = BINARY; 373 | b->expr.binary.op = check_token(STAR) ? OP_STAR : OP_SLASH; 374 | b->expr.binary.left = t; 375 | next(); 376 | b->expr.binary.right = factor(); 377 | 378 | t = b; 379 | } 380 | 381 | return t; 382 | } 383 | 384 | ast_node_t *factor(void) 385 | { 386 | token_t *tmp = list; 387 | if (check_token(NUMBER)) { 388 | ast_node_t *node = malloc(sizeof(ast_node_t)); 389 | node->expr.integer = atoi(list->lexeme); 390 | node->ty = INTEGER; 391 | next(); 392 | 393 | return node; 394 | } else if (check_consume(IDENTIFIER)) { 395 | if (check_token(L_PAREN)) { 396 | list = tmp; 397 | return ast_call(); 398 | } 399 | list = tmp; 400 | 401 | ast_node_t *node = malloc(sizeof(ast_node_t)); 402 | node->expr.identifier = list->lexeme; 403 | node->ty = ID; 404 | next(); 405 | 406 | return node; 407 | } else if (check_token(STRING)) { 408 | struct literal_pair *literal = malloc(sizeof(struct literal_pair)); 409 | 410 | 411 | 412 | literal->literal = list->lexeme; 413 | literal->label = str_count; 414 | if (literals_head != NULL) { 415 | literal->next = literals_head; 416 | } 417 | literals_head = literal; 418 | 419 | ast_node_t *node = malloc(sizeof(ast_node_t)); 420 | node->expr.string = str_count; 421 | node->ty = STRING_LIT; 422 | next(); 423 | str_count++; 424 | 425 | return node; 426 | } 427 | 428 | return NULL; 429 | } 430 | 431 | 432 | expr_type_t ast_type(void) 433 | { 434 | expr_type_t res; 435 | 436 | if (check_token(STRUCT)) { 437 | char *s_name = next()->lexeme; 438 | char *s_ty = aligned_alloc(0x1000, strlen(s_name)+1); 439 | strcpy(s_ty, s_name); 440 | next(); 441 | res = ((size_t)s_ty) | (STRUC & 0xfff); 442 | } else if (check_consume(LONG)) { 443 | check_consume(INT); 444 | res = INT64; 445 | } else if (check_consume(SHORT)) { 446 | check_consume(INT); 447 | res = INT16; 448 | } else if (check_consume(UNSIGNED)) { 449 | expr_type_t t = ast_type(); 450 | switch (t) { 451 | case INT8: 452 | res = UINT8; 453 | break; 454 | case INT16: 455 | res = UINT16; 456 | break; 457 | case INT32: 458 | res = UINT32; 459 | break; 460 | case INT64: 461 | res = UINT64; 462 | break; 463 | default: 464 | // TODO: error 465 | break; 466 | } 467 | } else if (check_consume(CHAR)) { 468 | res = INT8; 469 | } else if (check_consume(INT)) { 470 | res = INT32; 471 | } else if (check_consume(VOID)) { 472 | res = VOID_T; 473 | } 474 | 475 | if (check_consume(STAR)) { 476 | res |= TYPE_POINTER; 477 | } 478 | 479 | return res; 480 | } 481 | 482 | size_t ast_type_size(expr_type_t ty) 483 | { 484 | switch(ty) { 485 | case INT8: 486 | return 1; 487 | case INT16: 488 | return 2; 489 | case INT32: 490 | return 4; 491 | case INT64: 492 | return 8; 493 | case UINT8: 494 | return 1; 495 | case UINT16: 496 | return 2; 497 | case UINT32: 498 | return 4; 499 | case UINT64: 500 | return 8; 501 | case F32: 502 | return 4; 503 | case F64: 504 | return 8; 505 | case VOID_T: 506 | return 0; 507 | case STRUC: 508 | return 0; 509 | default: 510 | return -1; 511 | } 512 | } 513 | 514 | void ast_walk(ast_node_t *ast) 515 | { 516 | if ((ast->ty & 0xfff) == INTEGER) { 517 | printf("n: %ld\n", ast->expr.integer); 518 | } else if ((ast->ty) == BINARY) { 519 | printf("("); 520 | ast_walk(ast->expr.binary.left); 521 | printf(" + "); 522 | ast_walk(ast->expr.binary.right); 523 | printf(")\n"); 524 | } 525 | } 526 | -------------------------------------------------------------------------------- /parser.h: -------------------------------------------------------------------------------- 1 | #ifndef PARSER_H 2 | #define PARSER_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #define TYPE_POINTER 0x100 10 | #define TYPE_MASK 0xff 11 | 12 | struct literal_pair { 13 | char *literal; 14 | size_t label; 15 | struct literal_pair *next; 16 | }; 17 | 18 | typedef enum op { 19 | OP_PLUS, 20 | OP_MINUS, 21 | OP_STAR, 22 | OP_SLASH, 23 | 24 | OP_GREATER, 25 | OP_GREATER_EQ, 26 | OP_LESS, 27 | OP_LESS_EQ, 28 | OP_EQUAL, 29 | OP_NOT_EQ, 30 | 31 | OP_NOT, 32 | } op_t; 33 | 34 | typedef enum ast_type { 35 | EXPRESSION, 36 | INTEGER, 37 | DECIMAL, 38 | ID, 39 | STRING_LIT, 40 | UNARY, 41 | BINARY, 42 | FUNCTION, 43 | VAR_DEF, 44 | VAR_ASSIGN, 45 | CALL, 46 | RETURN_STATEMENT, 47 | INLINE_ASM, 48 | } ast_type_t; 49 | 50 | typedef enum expr_type { 51 | INT8, 52 | INT16, 53 | INT32, 54 | INT64, 55 | UINT8, 56 | UINT16, 57 | UINT32, 58 | UINT64, 59 | F32, 60 | F64, 61 | VOID_T, 62 | STRUC, 63 | 64 | MAX = UINT64_MAX, 65 | } expr_type_t; 66 | 67 | struct arg { 68 | char *identifier; 69 | expr_type_t ty; 70 | }; 71 | 72 | typedef struct ast_node { 73 | ast_type_t ty; 74 | expr_type_t expr_ty; 75 | union ast_expr { 76 | uint64_t integer; 77 | double fl; 78 | char *identifier; 79 | size_t string; 80 | struct { 81 | op_t op; 82 | struct ast_node *node; 83 | } unary; 84 | 85 | struct { 86 | struct ast_node *left; 87 | struct ast_node *right; 88 | op_t op; 89 | } binary; 90 | struct { 91 | char *identifier; 92 | size_t arg_count; 93 | struct ast_node **args; 94 | } call; 95 | } expr; 96 | } ast_node_t; 97 | 98 | struct block_member; 99 | 100 | typedef struct ast_statement { 101 | ast_type_t t; 102 | union { 103 | struct { 104 | expr_type_t ty; 105 | char *identifier; 106 | size_t arg_count; 107 | struct arg *args; 108 | struct block_member *block; 109 | } function; 110 | struct { 111 | expr_type_t ty; 112 | char *identifier; 113 | struct ast_statement *assignment; 114 | } var_def; 115 | struct { 116 | char *identifier; 117 | ast_node_t *value; 118 | } var_assign; 119 | 120 | struct { 121 | ast_node_t *value; 122 | } ret; 123 | struct { 124 | char *source; 125 | size_t value_count; 126 | ast_node_t **values; 127 | } asm; 128 | ast_node_t *expression; 129 | } statement; 130 | } ast_statement_t; 131 | 132 | struct block_member { 133 | ast_statement_t *value; 134 | size_t stack_size; 135 | struct block_member *next; 136 | }; 137 | 138 | struct statement_list { 139 | ast_statement_t *statement; 140 | struct statement_list *next; 141 | }; 142 | 143 | token_t *next(void); 144 | uint8_t check_token(token_type_t ty); 145 | struct statement_list *ast_parse(void); 146 | ast_node_t *expression(void); 147 | ast_node_t *term(void); 148 | ast_node_t *factor(void); 149 | ast_node_t *ast_call(void); 150 | ast_statement_t *ast_statement(void); 151 | ast_statement_t *ast_function(void); 152 | ast_statement_t *ast_variable(void); 153 | ast_statement_t *ast_return(void); 154 | struct block_member*ast_block(void); 155 | expr_type_t ast_type(void); 156 | size_t ast_type_size(expr_type_t ty); 157 | void ast_walk(ast_node_t *ast); 158 | 159 | #endif 160 | -------------------------------------------------------------------------------- /test.c: -------------------------------------------------------------------------------- 1 | void write(int fd, long int s, int n); 2 | 3 | void test(int a) 4 | { 5 | write(0, "123456789\n", a); 6 | write(0, "\n", 1); 7 | } 8 | 9 | int main(void) 10 | { 11 | test(3 + 5); 12 | return 0; 13 | } 14 | -------------------------------------------------------------------------------- /trie.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | void trie_insert(struct trie *t, char *s, int32_t value) 6 | { 7 | if (t->children == NULL) { 8 | t->children = malloc(sizeof(struct trie) * 128); 9 | } 10 | 11 | while ((*s) != '\0') { 12 | if (t->children[*s] == NULL) { 13 | t->children[*s] = malloc(sizeof(struct trie)); 14 | t->children[*s]->children = malloc(sizeof(struct trie) * 128); 15 | } 16 | 17 | t = t->children[*s]; 18 | t->value = -1; 19 | s++; 20 | } 21 | 22 | t->value = value; 23 | } 24 | 25 | int32_t trie_get(struct trie *t, char *s) 26 | { 27 | while ((*s) != '\0') { 28 | t = t->children[*s]; 29 | if (t == NULL) return -1; 30 | s++; 31 | } 32 | 33 | return t->value; 34 | } 35 | -------------------------------------------------------------------------------- /trie.h: -------------------------------------------------------------------------------- 1 | #ifndef TRIE_H 2 | #define TRIE_H 3 | 4 | #include 5 | 6 | struct trie { 7 | struct trie **children; 8 | int64_t value; 9 | }; 10 | 11 | void trie_insert(struct trie *t, char *s, int32_t value); 12 | int32_t trie_get(struct trie *t, char *s); 13 | 14 | #endif 15 | --------------------------------------------------------------------------------